LIS_INT lis_matrix_setDLU_csr(LIS_INT nnzl, LIS_INT nnzu, LIS_SCALAR *diag, LIS_INT *lptr, LIS_INT *lindex, LIS_SCALAR *lvalue,
              LIS_INT *uptr, LIS_INT *uindex, LIS_SCALAR *uvalue, LIS_MATRIX A)
{
  LIS_INT        err;
  LIS_MATRIX_DIAG  D;

  LIS_DEBUG_FUNC_IN;

#if 0
  err = lis_matrix_check(A,LIS_MATRIX_CHECK_SET);
  if( err ) return err;
#else
  if(lis_matrix_is_assembled(A))  return LIS_SUCCESS;
  else {
    err = lis_matrix_check(A,LIS_MATRIX_CHECK_SET);
    if( err ) return err;
  }
#endif

  A->L = (LIS_MATRIX_CORE)lis_calloc(sizeof(struct LIS_MATRIX_CORE_STRUCT), "lis_matrix_setDLU_csr::A->L");
  if( A->L==NULL )
  {
    LIS_SETERR_MEM(sizeof(struct LIS_MATRIX_CORE_STRUCT));
    return LIS_OUT_OF_MEMORY;
  }
  A->U = (LIS_MATRIX_CORE)lis_calloc(sizeof(struct LIS_MATRIX_CORE_STRUCT), "lis_matrix_setDLU_csr::A->U");
  if( A->U==NULL )
  {
    LIS_SETERR_MEM(sizeof(struct LIS_MATRIX_CORE_STRUCT));
    lis_matrix_DLU_destroy(A);
    return LIS_OUT_OF_MEMORY;
  }
  err = lis_matrix_diag_create(A->n,0,A->comm,&D);
  if( err )
  {
    lis_matrix_DLU_destroy(A);
    return err;
  }

  lis_free(D->value);
  D->value       = diag;
  A->D           = D;
  A->L->nnz      = nnzl;
  A->L->ptr      = lptr;
  A->L->index    = lindex;
  A->L->value    = lvalue;
  A->U->nnz      = nnzu;
  A->U->ptr      = uptr;
  A->U->index    = uindex;
  A->U->value    = uvalue;
  A->is_copy     = LIS_FALSE;
  A->status      = -LIS_MATRIX_CSR;
  A->is_splited  = LIS_TRUE;

  LIS_DEBUG_FUNC_OUT;
  return LIS_SUCCESS;
}
LIS_INT lis_input_hb(LIS_MATRIX A, LIS_VECTOR b, LIS_VECTOR x, FILE *file)
{
  LIS_INT      err;
  LIS_INT      matrix_type;
  LIS_MATRIX  B;

  LIS_DEBUG_FUNC_IN;

  matrix_type = A->matrix_type;

  err = lis_input_hb_csr(A,b,x,file);
  if( err ) return err;

  if( matrix_type!=LIS_MATRIX_CSR && matrix_type!=LIS_MATRIX_CSC )
  {
    err = lis_matrix_duplicate(A,&B);
    if( err ) return err;
    lis_matrix_set_type(B,matrix_type);
    err = lis_matrix_convert(A,B);
    if( err ) return err;
    lis_matrix_storage_destroy(A);
    lis_matrix_DLU_destroy(A);
    lis_matrix_diag_destroy(A->WD);
    if( A->l2g_map ) lis_free( A->l2g_map );
    if( A->commtable ) lis_commtable_destroy( A->commtable );
    if( A->ranges ) lis_free( A->ranges );
    err = lis_matrix_copy_struct(B,A);
    if( err ) return err;
    lis_free(B);
    if( A->matrix_type==LIS_MATRIX_JAD )
    {
      A->work = (LIS_SCALAR *)lis_malloc(A->n*sizeof(LIS_SCALAR),"lis_input_hb::A->work");
      if( A->work==NULL )
      {
        LIS_SETERR_MEM(A->n*sizeof(LIS_SCALAR));
        return LIS_OUT_OF_MEMORY;
      }
    }
  }


  LIS_DEBUG_FUNC_OUT;
  return LIS_SUCCESS;
}
LIS_INT lis_solve_kernel(LIS_MATRIX A, LIS_VECTOR b, LIS_VECTOR x, LIS_SOLVER solver, LIS_PRECON precon)
{
	LIS_INT			nsolver, precon_type, maxiter;
	LIS_INT			err;
	LIS_SCALAR	*residual;
	LIS_VECTOR	xx;

	LIS_INT output;
	LIS_INT scale;
	LIS_INT conv_cond;
	LIS_INT precision,is_use_at,storage,block;
	LIS_INT i,n,np;
	double p_c_times, p_i_times,itimes;
	LIS_SCALAR nrm2,tol,tol_w;
	LIS_VECTOR t;
	LIS_VECTOR bb;
	LIS_MATRIX AA,B;
	LIS_MATRIX At;
	char buf[64];

	LIS_DEBUG_FUNC_IN;

	nsolver     = solver->options[LIS_OPTIONS_SOLVER];
	precon_type = solver->options[LIS_OPTIONS_PRECON];
	maxiter     = solver->options[LIS_OPTIONS_MAXITER];
	output      = solver->options[LIS_OPTIONS_OUTPUT];
	scale       = solver->options[LIS_OPTIONS_SCALE];
	precision   = solver->options[LIS_OPTIONS_PRECISION];
	is_use_at   = solver->options[LIS_OPTIONS_USE_AT];
	storage     = solver->options[LIS_OPTIONS_STORAGE];
	block       = solver->options[LIS_OPTIONS_STORAGE_BLOCK];
	conv_cond   = solver->options[LIS_OPTIONS_CONV_COND];
	tol         = solver->params[LIS_PARAMS_RESID-LIS_OPTIONS_LEN];
	tol_w       = solver->params[LIS_PARAMS_RESID_WEIGHT-LIS_OPTIONS_LEN];
	solver->precision = precision;

	if( nsolver < 1 || nsolver > LIS_SOLVERS_LEN )
	{
		LIS_SETERR2(LIS_ERR_ILL_ARG,"Parameter LIS_OPTIONS_SOLVER is %d (Set between 1 to %d)\n",nsolver, LIS_SOLVERS_LEN);
		return LIS_ERR_ILL_ARG;
	}
	if( precon_type < 0 || precon_type > precon_register_type )
	{
		LIS_SETERR2(LIS_ERR_ILL_ARG,"Parameter LIS_OPTIONS_PRECON is %d (Set between 0 to %d)\n",precon_type, precon_register_type-1);
		return LIS_ERR_ILL_ARG;
	}
	if( maxiter<0 )
	{
		LIS_SETERR1(LIS_ERR_ILL_ARG,"Parameter LIS_OPTIONS_MAXITER(=%d) is less than 0\n",maxiter);
		return LIS_ERR_ILL_ARG;
	}
	#ifdef USE_MPI
	if( precon_type == LIS_PRECON_TYPE_SAAMG  && solver->A->nprocs < 2)
	{
		LIS_SETERR1(LIS_ERR_ILL_ARG,"Parameter A->nprocs (=%d) is less than 2 (Set more than 1 when using parallel version of SAAMG)\n",solver->A->nprocs);
		return LIS_ERR_ILL_ARG;
	}
	#endif
	#ifdef USE_QUAD_PRECISION
		if( precision==LIS_PRECISION_QUAD && lis_solver_execute_quad[nsolver]==NULL )
		{
			LIS_SETERR1(LIS_ERR_NOT_IMPLEMENTED,"Quad precision solver %s is not implemented\n",lis_solvername[nsolver]);
			return LIS_ERR_NOT_IMPLEMENTED;
		}
		else if( precision==LIS_PRECISION_SWITCH && lis_solver_execute_switch[nsolver]==NULL )
		{
			LIS_SETERR1(LIS_ERR_NOT_IMPLEMENTED,"Switch solver %s is not implemented\n",lis_solvername[nsolver]);
			return LIS_ERR_NOT_IMPLEMENTED;
		}
		if( solver->options[LIS_OPTIONS_SWITCH_MAXITER]==-1 )
		{
			solver->options[LIS_OPTIONS_SWITCH_MAXITER] = maxiter;
		}
	#endif

	err = lis_solver_check_params[nsolver](solver);
	if( err )
	{
		solver->retcode = err;
		return err;
	}
	/* end parameter check */

	solver->A        = A;
	solver->b        = b;

	/* create initial vector */
	#ifndef USE_QUAD_PRECISION
		err = lis_vector_duplicate(A,&xx);
	#else
		if( precision==LIS_PRECISION_DOUBLE )
		{
			err = lis_vector_duplicate(A,&xx);
		}
		else
		{
			err = lis_vector_duplicateex(LIS_PRECISION_QUAD,A,&xx);
		}
	#endif
	if( err )
	{
		solver->retcode = err;
		return err;
	}
	if( solver->options[LIS_OPTIONS_INITGUESS_ZEROS] )
	{
	  if( output ) lis_printf(A->comm,"initial vector x = 0\n");
		#ifndef USE_QUAD_PRECISION
			lis_vector_set_all(0.0,xx);
		#else
			if( precision==LIS_PRECISION_DOUBLE )
			{
				lis_vector_set_all(0.0,xx);
			}
			else
			{
				lis_vector_set_allex_nm(0.0,xx);
			}
		#endif
	}
	else
	{
	  if( output ) lis_printf(A->comm,"initial vector x = user defined\n"); 
		#ifndef USE_QUAD_PRECISION
			lis_vector_copy(x,xx);
		#else
			if( precision==LIS_PRECISION_DOUBLE )
			{
				lis_vector_copy(x,xx);
			}
			else
			{
				lis_vector_copyex_nm(x,xx);
			}
		#endif
	}

	/* create residual history vector */
	if( solver->residual ) lis_free(solver->residual);
	residual = (LIS_SCALAR *)lis_malloc((maxiter+2)*sizeof(LIS_SCALAR),"lis_solve::residual");
	if( residual==NULL )
	{
		LIS_SETERR_MEM((maxiter+2)*sizeof(LIS_SCALAR));
		lis_vector_destroy(xx);
		solver->retcode = err;
		return err;
	}
	residual[0] = 1.0;


	n       = A->n;
	np      = A->np;
	t       = NULL;
	At      = NULL;


	p_c_times = lis_wtime();
	if( precon_type==LIS_PRECON_TYPE_IS )
	{
		if( solver->d==NULL )
		{
			err = lis_vector_duplicate(A,&solver->d);
			if( err )
			{
				return err;
			}
		}
		if( !A->is_scaled )
		{
			lis_matrix_scaling(A,b,solver->d,LIS_SCALE_JACOBI);
		}
		else if( !b->is_scaled )
		{
			#ifdef _OPENMP
			#pragma omp parallel for
			#endif
			for(i=0;i<n;i++)
			{
				b->value[i] = b->value[i]*solver->d->value[i];
			}
		}
		if( nsolver >= LIS_SOLVER_JACOBI && nsolver <= LIS_SOLVER_SOR )
		{
			solver->options[LIS_OPTIONS_ISLEVEL] = 0;
		}
	}
	else if( nsolver >= LIS_SOLVER_JACOBI && nsolver <= LIS_SOLVER_SOR && precon_type!=LIS_PRECON_TYPE_NONE )
	{
		if( solver->d==NULL )
		{
			err = lis_vector_duplicate(A,&solver->d);
			if( err )
			{
				return err;
			}
		}
		if( !A->is_scaled )
		{
			lis_matrix_scaling(A,b,solver->d,LIS_SCALE_JACOBI);
		}
	}
	else if( scale )
	{
		if( storage==LIS_MATRIX_BSR && scale==LIS_SCALE_JACOBI )
		{
			if( A->matrix_type!=LIS_MATRIX_BSR )
			{
				err = lis_matrix_duplicate(A,&B);
				if( err ) return err;
				lis_matrix_set_blocksize(B,block,block,NULL,NULL);
				lis_matrix_set_type(B,storage);
				err = lis_matrix_convert(A,B);
				if( err ) return err;
				lis_matrix_storage_destroy(A);
				lis_matrix_DLU_destroy(A);
				lis_matrix_diag_destroy(A->WD);
				if( A->l2g_map ) lis_free( A->l2g_map );
				if( A->commtable ) lis_commtable_destroy( A->commtable );
				if( A->ranges ) lis_free( A->ranges );
				err = lis_matrix_copy_struct(B,A);
				if( err ) return err;
				lis_free(B);
			}
			err = lis_matrix_split(A);
			if( err ) return err;
			err = lis_matrix_diag_duplicate(A->D,&solver->WD);
			if( err ) return err;
			lis_matrix_diag_copy(A->D,solver->WD);
			lis_matrix_diag_inverse(solver->WD);
			lis_matrix_bscaling_bsr(A,solver->WD);
			lis_vector_duplicate(A,&t);
			lis_matrix_diag_matvec(solver->WD,b,t);
			lis_vector_copy(t,b);
			lis_vector_destroy(t);
			t = NULL;
		}
		else
		{
			if( solver->d==NULL )
			{
				err = lis_vector_duplicate(A,&solver->d);
				if( err )
				{
					return err;
				}
			}
			if( scale==LIS_SCALE_JACOBI && nsolver==LIS_SOLVER_CG )
			{
				scale = LIS_SCALE_SYMM_DIAG;
			}
			if( !A->is_scaled )
			{
				lis_matrix_scaling(A,b,solver->d,scale);
			}
			else if( !b->is_scaled )
			{
				#ifdef _OPENMP
				#pragma omp parallel for
				#endif
				for(i=0;i<n;i++)
				{
					b->value[i] = b->value[i]*solver->d->value[i];
				}
			}
		}
	}

/*	precon_type = precon->precon_type;*/
	if( precon_type==LIS_PRECON_TYPE_IS )
	{
		if( nsolver < LIS_SOLVER_JACOBI || nsolver > LIS_SOLVER_SOR )
		{
			AA = solver->A;
			bb = solver->b;
		}
		else
		{
			AA = precon->A;
			bb = precon->Pb;
		}
	}
	else
	{
		AA = A;
		bb = b;
	}

	p_c_times = lis_wtime() - p_c_times;
	itimes = lis_wtime();

	/* Matrix Convert */
	solver->A  = AA;
	solver->b  = bb;
	err = lis_matrix_convert_self(solver);
	if( err )
	{
		lis_vector_destroy(xx);
		lis_solver_work_destroy(solver);
		lis_free(residual);
		solver->retcode = err;
		return err;
	}
	block = solver->A->bnr;

	if( A->my_rank==0 )
	{
	  if( output ) printf("precision : %s\n", lis_precisionname[precision]); 
	  if( output ) printf("solver    : %s %d\n", lis_solvername[nsolver],nsolver); 
		switch( precon_type )
		{
		case LIS_PRECON_TYPE_ILU:
			i = solver->options[LIS_OPTIONS_FILL];
			if( A->matrix_type==LIS_MATRIX_BSR || A->matrix_type==LIS_MATRIX_VBR )
			{
			  if( output ) sprintf(buf,"Block %s(%d)",lis_preconname[precon_type],i); 
			}
			else
			{
			  if( output ) sprintf(buf,"%s(%d)",lis_preconname[precon_type],i); 
			}
			break;
		default:
		  if( output ) sprintf(buf,"%s",lis_preconname[precon_type]); 
			break;
		}
		if( solver->options[LIS_OPTIONS_ADDS] && precon_type )
		{
		  if( output ) printf("precon    : %s + additive schwarz\n", buf); 
		}
		else
		{
		  if( output ) printf("precon    : %s\n", buf); 
		}
	}
	switch(conv_cond)
	{
	case LIS_CONV_COND_NRM2_R:
	case LIS_CONV_COND_NRM2_B:
		if( A->my_rank==0 )
		{
		  if( output ) ("CONV_COND : ||r||_2 <= %6.1e*||r_0||_2\n", tol); 
		}
		break;
	case LIS_CONV_COND_NRM1_B:
		lis_vector_nrm1(b,&nrm2);
		nrm2 = nrm2*tol_w + tol;
		if( A->my_rank==0 )
		{
		  if( output ) printf("conv_cond : ||r||_1 <= %6.1e*||b||_1 + %6.1e = %6.1e\n", tol_w,tol,nrm2);
		}
		break;
	}
	if( A->my_rank==0 )
	{
		if( AA->matrix_type==LIS_MATRIX_BSR || AA->matrix_type==LIS_MATRIX_BSC )
		{
		  if( output ) printf("storage   : %s(%d x %d)\n", lis_storagename[AA->matrix_type-1],block,block); 
		}
		else
		{
		  if( output ) printf("storage   : %s\n", lis_storagename[AA->matrix_type-1]); 
		}
	}


	/* create work vector */
	err = lis_solver_malloc_work[nsolver](solver); 
	if( err )
	{
		lis_vector_destroy(xx);
		lis_precon_destroy(precon);
		solver->retcode = err;
		return err;
	}
	if( nsolver==LIS_SOLVER_BICG && is_use_at )
	{
	  if( output ) lis_printf(A->comm,"Use At\n"); 
		lis_matrix_duplicate(AA,&At);
		lis_matrix_set_type(At,LIS_USE_AT_TYPE[AA->matrix_type]);
		lis_matrix_convert(AA,At);
		solver->At = At;
	}

	solver->x        = xx;
	solver->xx       = x;
	solver->precon   = precon;
	solver->residual = residual;

	/* execute solver */
	#ifndef USE_QUAD_PRECISION
		err = lis_solver_execute[nsolver](solver);
	#else
		if( precision==LIS_PRECISION_DOUBLE )
		{
			err = lis_solver_execute[nsolver](solver);
		}
		else if( precision==LIS_PRECISION_QUAD )
		{
			err = lis_solver_execute_quad[nsolver](solver);
		}
		else if( precision==LIS_PRECISION_SWITCH )
		{
			err = lis_solver_execute_switch[nsolver](solver);
		}
	#endif
	solver->retcode = err;

	if( scale==LIS_SCALE_SYMM_DIAG && precon_type!=LIS_PRECON_TYPE_IS)
	{
		#ifdef _OPENMP
		#pragma omp parallel for
		#endif
		for(i=0;i<n;i++)
		{
			x->value[i] = xx->value[i]*solver->d->value[i];
		}
	}
	else
	{
		#ifndef USE_QUAD_PRECISION
			lis_vector_copy(xx,x);
		#else
			if( precision==LIS_PRECISION_DOUBLE )
			{
				lis_vector_copy(xx,x);
			}
			else
			{
				lis_vector_copyex_mn(xx,x);
			}
		#endif
	}
	itimes = lis_wtime() - itimes - solver->ptimes;
	p_i_times = solver->ptimes;
	solver->ptimes = p_c_times + p_i_times;
	solver->p_c_times = p_c_times;
	solver->p_i_times = p_i_times;
	solver->times  = solver->ptimes + itimes;
	solver->itimes = itimes;
	lis_solver_work_destroy(solver);
	lis_vector_duplicate(A,&t);
	xx->precision = LIS_PRECISION_DEFAULT;
	lis_matvec(A,xx,t);
	lis_vector_xpay(b,-1.0,t);
	if( scale==LIS_SCALE_SYMM_DIAG && precon_type!=LIS_PRECON_TYPE_IS)
	{
		#ifdef _OPENMP
		#pragma omp parallel for
		#endif
		for(i=0;i<n;i++)
		{
			t->value[i] = t->value[i]/solver->d->value[i];
		}
	}
	lis_vector_nrm2(t,&nrm2);
	/*
	solver->resid = nrm2;
	*/
	if( A->my_rank==0 )
	{
		if( err )
		{
		  if( output ) printf("lis_solve : %s(code=%d)\n\n",lis_returncode[err],err); 

		}
		else
		{
		  if( output ) printf("lis_solve : normal end\n\n"); 
		}
	}
	if( precision==LIS_PRECISION_DOUBLE )
	{
		solver->iter2 = solver->iter;
	}
	else if( precision==LIS_PRECISION_QUAD )
	{
		solver->iter2 = 0;
	}


	lis_vector_destroy(t);
/*	lis_vector_destroy(d);*/
	lis_vector_destroy(xx);

	LIS_DEBUG_FUNC_OUT;
	return LIS_SUCCESS;
}
LIS_INT lis_input_hb_csr(LIS_MATRIX A, LIS_VECTOR b, LIS_VECTOR x, FILE *file)
{
  char      buf[BUFSIZE];
  char      title[128], key[128], mtx[64], dat[128];
  char      *p;
  char      MXTYPE_F,MXTYPE_S,MXTYPE_T;
  char      RHSTYP_F,RHSTYP_S,RHSTYP_T;
  LIS_INT        TOTCRD,PTRCRD,INDCRD,VALCRD,RHSCRD;
  LIS_INT        NROW,NCOL,NNZERO,NELTVL;
  LIS_INT        NRHS,NRHSIX;
  LIS_INT        iptr,iind,ival,irhs;
  LIS_INT        wptr,wind,wval,wrhs;
  LIS_INT        i,k,j,my_rank;
  LIS_INT        err;
  LIS_INT        n,is,ie;
  LIS_INT        *ptr, *index;
  LIS_INT        matrix_type;
  LIS_SCALAR    *value;
  LIS_MATRIX    B;

  #ifdef USE_MPI
    MPI_Comm_rank(A->comm,&my_rank);
  #else
    my_rank = 0;
  #endif

  matrix_type = A->matrix_type;

  /* Line 1 */
  if( fgets(buf, BUFSIZE, file) == NULL )
  {
    LIS_SETERR_FIO;
    return LIS_ERR_FILE_IO;
  }
  strncpy(title, buf    ,72); title[72] = '\0';
  strncpy(key  ,&buf[72], 8); key[8]    = '\0';
  printf("title: %s\n",title);
  printf("key  : %s\n",key);

  /* Line 2 */
  if( fgets(buf, BUFSIZE, file) == NULL )
  {
    LIS_SETERR_FIO;
    return LIS_ERR_FILE_IO;
  }
#ifdef _LONGLONG
  if( sscanf(buf, "%14lld%14lld%14lld%14lld%14lld", &TOTCRD, &PTRCRD, &INDCRD, &VALCRD, &RHSCRD) != 5 )
#else
  if( sscanf(buf, "%14d%14d%14d%14d%14d", &TOTCRD, &PTRCRD, &INDCRD, &VALCRD, &RHSCRD) != 5 )
#endif
  {
    LIS_SETERR_FIO;
    return LIS_ERR_FILE_IO;
  }
#ifdef _LONGLONG
  printf("%14lld%14lld%14lld%14lld%14lld\n",TOTCRD, PTRCRD, INDCRD, VALCRD, RHSCRD);
#else
  printf("%14d%14d%14d%14d%14d\n",TOTCRD, PTRCRD, INDCRD, VALCRD, RHSCRD);
#endif

  /* Line 3 */
  if( fgets(buf, BUFSIZE, file) == NULL )
  {
    LIS_SETERR_FIO;
    return LIS_ERR_FILE_IO;
  }
#ifdef _LONGLONG
  if( sscanf(buf, "%s %lld %lld %lld %lld", mtx, &NROW, &NCOL, &NNZERO, &NELTVL) != 5 )
#else
  if( sscanf(buf, "%s %d %d %d %d", mtx, &NROW, &NCOL, &NNZERO, &NELTVL) != 5 )
#endif
  {
    LIS_SETERR_FIO;
    return LIS_ERR_FILE_IO;
  }
  for(p=mtx;*p!='\0';p++)     *p = (char)tolower(*p);
  MXTYPE_F = mtx[0];
  MXTYPE_S = mtx[1];
  MXTYPE_T = mtx[2];
  if( mtx[0]!='r' )
  {
    LIS_SETERR(LIS_ERR_FILE_IO,"Not real\n");
    return LIS_ERR_FILE_IO;
  }
  /*
  if( mtx[1]!='u' )
  {
    LIS_SETERR(LIS_ERR_FILE_IO,"Not unsymmetric\n");
    return LIS_ERR_FILE_IO;
  }
  */
  if( mtx[2]!='a' )
  {
    LIS_SETERR(LIS_ERR_FILE_IO,"Not assembled\n");
    return LIS_ERR_FILE_IO;
  }
  if( NROW!=NCOL )
  {
    LIS_SETERR(LIS_ERR_FILE_IO,"matrix is not square\n");
    return LIS_ERR_FILE_IO;
  }
#ifdef _LONGLONG
  printf("%c%c%c %lld %lld %lld %lld\n",MXTYPE_F, MXTYPE_S, MXTYPE_T, NROW, NCOL, NNZERO, NELTVL);
#else
  printf("%c%c%c %d %d %d %d\n",MXTYPE_F, MXTYPE_S, MXTYPE_T, NROW, NCOL, NNZERO, NELTVL);
#endif

  /* Line 4 */
  if( fgets(buf, BUFSIZE, file) == NULL )
  {
    LIS_SETERR_FIO;
    return LIS_ERR_FILE_IO;
  }
  lis_input_hb_get_fmt( buf    ,16,&iptr,&wptr);
  lis_input_hb_get_fmt(&buf[16],16,&iind,&wind);
  lis_input_hb_get_fmt(&buf[32],20,&ival,&wval);
  lis_input_hb_get_fmt(&buf[52],20,&irhs,&wrhs);
#ifdef _LONGLONG
  printf("%lld %lld %lld %lld\n",iptr,iind,ival,irhs);
  printf("%lld %lld %lld %lld\n",wptr,wind,wval,wrhs);
#else
  printf("%d %d %d %d\n",iptr,iind,ival,irhs);
  printf("%d %d %d %d\n",wptr,wind,wval,wrhs);
#endif

  /* Line 5 */
  if( RHSCRD!=0 )
  {
    if( fgets(buf, BUFSIZE, file) == NULL )
    {
      LIS_SETERR_FIO;
      return LIS_ERR_FILE_IO;
    }
#ifdef _LONGLONG
    sscanf(buf, "%s %lld %lld", mtx, &NRHS, &NRHSIX);
#else
    sscanf(buf, "%s %d %d", mtx, &NRHS, &NRHSIX);
#endif
/*
#ifdef _LONGLONG
    if( sscanf(buf, "%s %lld %lld", mtx, &NRHS, &NRHSIX) != 3 )
#else
    if( sscanf(buf, "%s %d %d", mtx, &NRHS, &NRHSIX) != 3 )
#endif
    {
      LIS_SETERR_FIO;
      return LIS_ERR_FILE_IO;
    }
*/
    for(p=mtx;*p!='\0';p++)     *p = (char)tolower(*p);
    RHSTYP_F = mtx[0];
    RHSTYP_S = mtx[1];
    RHSTYP_T = mtx[2];
#ifdef _LONGLONG
    printf("%c%c%c %lld %lld\n",RHSTYP_F, RHSTYP_S, RHSTYP_T, NRHS, NRHSIX);
#else
    printf("%c%c%c %d %d\n",RHSTYP_F, RHSTYP_S, RHSTYP_T, NRHS, NRHSIX);
#endif
  }

  err = lis_matrix_set_size(A,0,NROW);
  if( err )
  {
    return err;
  }
  n = A->n;
  lis_matrix_get_range(A,&is,&ie);
  err = lis_matrix_malloc_csr(n,NNZERO,&ptr,&index,&value);
  if( err )
  {
    return err;
  }

  /* read data */
  k = 0;
  for( i=0; i<PTRCRD; i++ )
  {
    if( fgets(buf, BUFSIZE, file) == NULL )
    {
      LIS_SETERR_FIO;
      return LIS_ERR_FILE_IO;
    }
    p = buf;
    for(j=0;j<iptr&&k<n+1;j++)
    {
      strncpy(dat, p, wptr); dat[wptr] = '\0';
      ptr[k] = atoi(dat) - 1;
      p += wptr;
      k++;
    }
  }

  k = 0;
  for( i=0; i<INDCRD; i++ )
  {
    if( fgets(buf, BUFSIZE, file) == NULL )
    {
      LIS_SETERR_FIO;
      return LIS_ERR_FILE_IO;
    }
    p = buf;
    for(j=0;j<iind&&k<NNZERO;j++)
    {
      strncpy(dat, p, wind); dat[wind] = '\0';
      index[k] = atoi(dat) - 1;
      p += wind;
      k++;
    }
  }

  k = 0;
  for( i=0; i<VALCRD; i++ )
  {
    if( fgets(buf, BUFSIZE, file) == NULL )
    {
      LIS_SETERR_FIO;
      return LIS_ERR_FILE_IO;
    }
    p = buf;
    for(j=0;j<ival&&k<NNZERO;j++)
    {
      strncpy(dat, p, wval); dat[wval] = '\0';
      value[k] = atof(dat);
      p += wval;
      k++;
    }
  }

  if( RHSCRD>0 )
  {
    /*
    k = 0;
    for( i=0; i<RHSCRD; i++ )
    {
      if( fgets(buf, BUFSIZE, file) == NULL )
      {
        LIS_SETERR_FIO;
        return LIS_ERR_FILE_IO;
      }
      p = buf;
      for(j=0;j<ival&&k<NNZERO;j++)
      {
        strncpy(dat, p, wval); dat[wval] = '\0';
        value[k] = atof(dat);
        p += wval;
        printf("%e ",value[k]);
        k++;
      }
      printf("\n");
    }
    */
  }
  err = lis_matrix_set_csc(NNZERO,ptr,index,value,A);
  if( err )
  {
    return err;
  }
  err = lis_matrix_assemble(A);
  if( err ) return err;

  if( matrix_type!=LIS_MATRIX_CSC )
  {
    err = lis_matrix_duplicate(A,&B);
    if( err ) return err;
    lis_matrix_set_type(B,LIS_MATRIX_CSR);
    err = lis_matrix_convert_csc2csr(A,B);
    if( err ) return err;
    lis_matrix_storage_destroy(A);
    lis_matrix_DLU_destroy(A);
    lis_matrix_diag_destroy(A->WD);
    if( A->l2g_map ) lis_free( A->l2g_map );
    if( A->commtable ) lis_commtable_destroy( A->commtable );
    if( A->ranges ) lis_free( A->ranges );
    err = lis_matrix_copy_struct(B,A);
    if( err ) return err;
    lis_free(B);
  }

  return LIS_SUCCESS;
}
Beispiel #5
0
LIS_INT lis_esolve(LIS_MATRIX A, LIS_VECTOR x, LIS_SCALAR *evalue0, LIS_ESOLVER esolver)
{
        LIS_INT	nesolver,niesolver,emaxiter; 
	LIS_SCALAR *evalue;
	LIS_VECTOR *evector;
	LIS_SCALAR *resid;
	LIS_SCALAR *rhistory;
	LIS_INT	*iter,*iter2;
	LIS_INT	err;
	LIS_INT output;
	LIS_INT ss, mode;
	double time;
	double gshift;
	LIS_INT	estorage,eblock;
	LIS_MATRIX B;
	LIS_INT eprecision;
	LIS_VECTOR xx;

	LIS_DEBUG_FUNC_IN;

	/* begin parameter check */
	err = lis_matrix_check(A,LIS_MATRIX_CHECK_ALL);

	if( err ) return err;
	if( x==NULL )
	{
		LIS_SETERR(LIS_ERR_ILL_ARG,"vector x is undefined\n");
		return LIS_ERR_ILL_ARG;
	}
	if( A->n!=x->n )
	{
		return LIS_ERR_ILL_ARG;
	}
	if( A->gn<=0 )
	{
		LIS_SETERR1(LIS_ERR_ILL_ARG,"Size n(=%d) of matrix A is less than 0\n",A->gn);
		return LIS_ERR_ILL_ARG;
	}

	nesolver = esolver->options[LIS_EOPTIONS_ESOLVER];
	niesolver = esolver->options[LIS_EOPTIONS_INNER_ESOLVER];
	ss = esolver->options[LIS_EOPTIONS_SUBSPACE];
	mode = esolver->options[LIS_EOPTIONS_MODE];
	emaxiter = esolver->options[LIS_EOPTIONS_MAXITER];
	gshift = esolver->params[LIS_EPARAMS_SHIFT - LIS_EOPTIONS_LEN];
	output = esolver->options[LIS_EOPTIONS_OUTPUT];
	estorage = esolver->options[LIS_EOPTIONS_STORAGE];
	eblock = esolver->options[LIS_EOPTIONS_STORAGE_BLOCK];
	eprecision = esolver->options[LIS_EOPTIONS_PRECISION];
	esolver->eprecision = eprecision;

	if( nesolver < 1 || nesolver > LIS_ESOLVER_LEN )
	{
		LIS_SETERR2(LIS_ERR_ILL_ARG,"Parameter LIS_EOPTIONS_ESOLVER is %d (Set between 1 to %d)\n",nesolver, LIS_ESOLVER_LEN);
		return LIS_ERR_ILL_ARG;
	}

	if( niesolver < 1 || niesolver > 7 ) 
	{
		LIS_SETERR1(LIS_ERR_ILL_ARG,"Parameter LIS_EOPTIONS_INNER_ESOLVER is %d (Set between 1 to 7)\n", niesolver);
		return LIS_ERR_ILL_ARG;
	}

	if ( esolver->options[LIS_EOPTIONS_ESOLVER] == LIS_ESOLVER_SI && niesolver > 4 )
	{
		LIS_SETERR1(LIS_ERR_ILL_ARG,"Parameter LIS_EOPTIONS_INNER_ESOLVER is %d (Set between 1 to 4 for Subspace)\n", niesolver);
		return LIS_ERR_ILL_ARG;
	}

	if ( esolver->options[LIS_EOPTIONS_ESOLVER] == LIS_ESOLVER_LI && niesolver == LIS_ESOLVER_PI )
	{
		LIS_SETERR1(LIS_ERR_ILL_ARG,"Parameter LIS_EOPTIONS_INNER_ESOLVER is %d (Set between 2 to 7 for Lanczos)\n", niesolver);
		return LIS_ERR_ILL_ARG;
	}

	if ( esolver->options[LIS_EOPTIONS_ESOLVER] == LIS_ESOLVER_AI && (( niesolver == LIS_ESOLVER_PI ) || ( niesolver == LIS_ESOLVER_CG) || ( niesolver == LIS_ESOLVER_JD)) )
	{
		LIS_SETERR1(LIS_ERR_ILL_ARG,"Parameter LIS_EOPTIONS_INNER_ESOLVER is %d (Set between 2 to 4 or 6 for Arnoldi)\n", niesolver);
		return LIS_ERR_ILL_ARG;
	}

	if ( esolver->options[LIS_EOPTIONS_ESOLVER] == LIS_ESOLVER_SI && ss > A->gn )
	{
		LIS_SETERR2(LIS_ERR_ILL_ARG,"Parameter LIS_EOPTIONS_SUBSPACE is %d (Set less than or equal to matrix size %d for Subspace)\n", ss, A->gn);
		return LIS_ERR_ILL_ARG;
	}

	if (( esolver->options[LIS_EOPTIONS_ESOLVER] == LIS_ESOLVER_LI || esolver->options[LIS_EOPTIONS_ESOLVER] == LIS_ESOLVER_AI ) && ss > A->gn )
	{
		LIS_SETERR2(LIS_ERR_ILL_ARG,"Parameter LIS_EOPTIONS_SUBSPACE is %d (Set less than or equal to matrix size %d for Lanczos and Arnoldi)\n", ss, A->gn);
		return LIS_ERR_ILL_ARG;
	}

	if ( esolver->options[LIS_EOPTIONS_ESOLVER] == LIS_ESOLVER_SI && mode >= ss )
	{
		LIS_SETERR2(LIS_ERR_ILL_ARG,"Parameter LIS_EOPTIONS_MODE is %d (Set less than subspace size %d for Subspace)\n", mode, ss);
		return LIS_ERR_ILL_ARG;
	}

	if ( esolver->options[LIS_EOPTIONS_ESOLVER] == ( LIS_ESOLVER_LI || LIS_ESOLVER_AI ) && mode >= ss )
	{
		LIS_SETERR2(LIS_ERR_ILL_ARG,"Parameter LIS_EOPTIONS_MODE is %d (Set less than subspace size %d for Lanczos or Arnoldi)\n", mode, ss);
		return LIS_ERR_ILL_ARG;
	}

	#ifdef USE_QUAD_PRECISION
		if( eprecision==LIS_PRECISION_QUAD && lis_esolver_execute_quad[nesolver]==NULL )
		{
			LIS_SETERR1(LIS_ERR_NOT_IMPLEMENTED,"Quad precision eigensolver %s is not implemented\n",lis_esolvername[nesolver]);
			return LIS_ERR_NOT_IMPLEMENTED;
		}
		else if( eprecision==LIS_PRECISION_SWITCH && lis_esolver_execute_switch[nesolver]==NULL )
		{
			LIS_SETERR1(LIS_ERR_NOT_IMPLEMENTED,"Switch esolver %s is not implemented\n",lis_esolvername[nesolver]);
			return LIS_ERR_NOT_IMPLEMENTED;
		}
		if( esolver->options[LIS_EOPTIONS_SWITCH_MAXITER]==-1 )
		{
			esolver->options[LIS_EOPTIONS_SWITCH_MAXITER] = emaxiter;
		}
	#endif

	/* create eigenvalue array */
	if( esolver->evalue ) lis_free(esolver->evalue);
	evalue = (LIS_SCALAR *)lis_malloc((ss+2)*sizeof(LIS_SCALAR),"lis_esolve::evalue");
	if( evalue==NULL )
	{
		LIS_SETERR_MEM((ss+2)*sizeof(LIS_SCALAR));
		esolver->retcode = err;
		return err;
	}
	evalue[0] = 1.0;
	evalue[ss-1] = 1.0;

	/* create residual norm array */
	if( esolver->resid ) lis_free(esolver->resid);
	resid = (LIS_SCALAR *)lis_malloc((ss+2)*sizeof(LIS_SCALAR),"lis_esolve::resid");
	if( resid==NULL )
	{
		LIS_SETERR_MEM((ss+2)*sizeof(LIS_SCALAR));
		esolver->retcode = err;
		return err;
	}

	/* create number of iterations array */
	if( esolver->iter ) lis_free(esolver->iter);
	iter = (LIS_INT *)lis_malloc((ss+2)*sizeof(LIS_SCALAR),"lis_esolve::iter");
	if( iter==NULL )
	{
		LIS_SETERR_MEM((ss+2)*sizeof(LIS_SCALAR));
		esolver->retcode = err;
		return err;
	}

	/* create quad precision number of iterations array */
	if( esolver->iter2 ) lis_free(esolver->iter2);
	iter2 = (LIS_INT *)lis_malloc((ss+2)*sizeof(LIS_SCALAR),"lis_esolve::iter2");
	if( iter2==NULL )
	{
		LIS_SETERR_MEM((ss+2)*sizeof(LIS_SCALAR));
		esolver->retcode = err;
		return err;
	}

	/* create initial vector */
	#ifndef USE_QUAD_PRECISION
		err = lis_vector_duplicate(A,&xx);
	#else
		if( eprecision==LIS_PRECISION_DOUBLE )
		{
			err = lis_vector_duplicate(A,&xx);
		}
		else
		{
			err = lis_vector_duplicateex(LIS_PRECISION_QUAD,A,&xx);
		}
	#endif
	if( err )
	{
		esolver->retcode = err;
		return err;
	}
	if( esolver->options[LIS_EOPTIONS_INITGUESS_ONES] )
	{
	  if( output ) lis_printf(A->comm,"initial vector x      : 1\n");
		#ifndef USE_QUAD_PRECISION
			lis_vector_set_all(1.0,xx);
		#else
			if( eprecision==LIS_PRECISION_DOUBLE )
			{
				lis_vector_set_all(1.0,xx);
			}
			else
			{
				lis_vector_set_allex_nm(1.0,xx);
			}
		#endif
	}
	else
	{
	  if( output ) lis_printf(A->comm,"initial vector x      : user defined\n"); 
		#ifndef USE_QUAD_PRECISION
			lis_vector_copy(x,xx);
		#else
			if( eprecision==LIS_PRECISION_DOUBLE )
			{
				lis_vector_copy(x,xx);
			}
			else
			{
				lis_vector_copyex_nm(x,xx);
			}
		#endif
	}

	/* global shift */
	if ( output ) if( A->my_rank==0 ) printf("shift                 : %e\n", gshift);		

	/* create eigenvector array */
	if( esolver->evector ) lis_free(esolver->evector);
	evector = (LIS_VECTOR *)lis_malloc((ss+2)*sizeof(LIS_VECTOR),"lis_esolve::evector");
	if( evector==NULL )
	{
		LIS_SETERR_MEM((ss+2)*sizeof(LIS_VECTOR));
		esolver->retcode = err;
		return err;
	}

	/* create residual history array */
	if( esolver->rhistory ) lis_free(esolver->rhistory);
	rhistory = (LIS_SCALAR *)lis_malloc((emaxiter+2)*sizeof(LIS_SCALAR),"lis_esolve::rhistory");
	if( rhistory==NULL )
	{
		LIS_SETERR_MEM((emaxiter+2)*sizeof(LIS_SCALAR));
		lis_vector_destroy(xx);
		esolver->retcode = err;
		return err;
	}

	/* convert matrix */
	if( estorage>0 && A->matrix_type!=estorage )
	{
		err = lis_matrix_duplicate(A,&B);
		if( err ) return err;
		lis_matrix_set_blocksize(B,eblock,eblock,NULL,NULL);
		lis_matrix_set_type(B,estorage);
		err = lis_matrix_convert(A,B);
		if( err ) return err;
		lis_matrix_storage_destroy(A);
		lis_matrix_DLU_destroy(A);
		lis_matrix_diag_destroy(A->WD);
		if( A->l2g_map ) lis_free( A->l2g_map );
		if( A->commtable ) lis_commtable_destroy( A->commtable );
		if( A->ranges ) lis_free( A->ranges );
		err = lis_matrix_copy_struct(B,A);
		if( err ) return err;
		lis_free(B);
	}

	esolver->A        = A;
	esolver->evalue   = evalue;
	esolver->x        = x;
	esolver->evector  = evector;
	rhistory[0]       = 1.0;
	esolver->rhistory = rhistory;
	esolver->resid    = resid;
	esolver->iter     = iter;
	esolver->iter2    = iter2;

        if( A->my_rank==0 )
	  {
#ifdef _LONG__DOUBLE
  	    if ( output ) printf("precision             : long double\n");
#else
	    if ( output ) printf("precision             : %s\n", lis_eprecisionname[eprecision]);
#endif
#ifdef _LONG__LONG
	    if ( output ) printf("eigensolver           : %s\n", lis_esolvername[nesolver]);
#else
	    if ( output ) printf("eigensolver           : %s\n", lis_esolvername[nesolver]);
#endif
	  }

	if( A->my_rank==0 )
	  {
#ifdef _LONG__DOUBLE
	    if ( output ) printf("convergence condition : ||lx-Ax||_2 <= %6.1Le * ||lx||_2\n", esolver->params[LIS_EPARAMS_RESID - LIS_EOPTIONS_LEN]);
#else
	    if ( output ) printf("convergence condition : ||lx-Ax||_2 <= %6.1e * ||lx||_2\n", esolver->params[LIS_EPARAMS_RESID - LIS_EOPTIONS_LEN]); 
#endif
	  }

	if( A->my_rank==0 )
	  {
	    if( A->matrix_type==LIS_MATRIX_BSR || A->matrix_type==LIS_MATRIX_BSC )
	      {
#ifdef _LONG__LONG
		if ( output ) printf("matrix storage format : %s(%lld x %lld)\n", lis_estoragename[A->matrix_type-1],eblock,eblock);
#else
		if ( output ) printf("matrix storage format : %s(%d x %d)\n", lis_estoragename[A->matrix_type-1],eblock,eblock); 
#endif
	      }
	    else
	      {
		if ( output ) printf("matrix storage format : %s\n", lis_estoragename[A->matrix_type-1]); 
	      }
	  }
	
	time = lis_wtime();

	esolver->ptime = 0;
	esolver->itime = 0;
	esolver->p_c_time = 0;
	esolver->p_i_time = 0;


	if (gshift != 0.0) lis_matrix_shift_diagonal(A, gshift);

	/* create work vector */
	err = lis_esolver_malloc_work[nesolver](esolver);
	if( err )
	{
	  lis_vector_destroy(xx);
	  esolver->retcode = err;
	  return err;
	}

	esolver->x        = xx;
	esolver->xx       = x;

	/* execute esolver */
	#ifndef USE_QUAD_PRECISION
		err = lis_esolver_execute[nesolver](esolver);
	#else
		if( eprecision==LIS_PRECISION_DOUBLE )
		{
			err = lis_esolver_execute[nesolver](esolver);
		}
		else if( eprecision==LIS_PRECISION_QUAD )
		{
			err = lis_esolver_execute_quad[nesolver](esolver);
		}
		else if( eprecision==LIS_PRECISION_SWITCH )
		{
			err = lis_esolver_execute_switch[nesolver](esolver);
		}
	#endif
	esolver->retcode = err;

	*evalue0 = esolver->evalue[0];
	lis_vector_copy(esolver->x, x);

	esolver->time = lis_wtime() - time; 

	lis_matrix_shift_diagonal(A, -gshift);

        if( A->my_rank==0 )
        {
                if( err )
                {
#ifdef _LONG__LONG
                  if ( output ) printf("eigensolver status    : %s(code=%lld)\n\n",lis_ereturncode[err],err);
#else
                  if ( output ) printf("eigensolver status    : %s(code=%d)\n\n",lis_ereturncode[err],err);
#endif

                }
                else
                {
                  if ( output ) printf("eigensolver status    : normal end\n\n");
                }
        }

	if( eprecision==LIS_PRECISION_DOUBLE )
	{
		esolver->iter2[mode] = esolver->iter[mode];
	}
	else if( eprecision==LIS_PRECISION_QUAD )
	{
		esolver->iter2[mode] = 0;
	}

	lis_vector_destroy(xx);

	LIS_DEBUG_FUNC_OUT;
	return LIS_SUCCESS;
}