LIS_INT lis_finalize(void)
{
  LIS_DEBUG_FUNC_IN;

  lis_precon_register_free();
  /*
  if( cmd_args ) lis_args_free(cmd_args);
  */
  if( cmd_args ) 
    {
      lis_args_free(cmd_args); 
      cmd_args = NULL;
    }
  #ifdef _OPENMP
    lis_free(lis_vec_tmp);
  #endif
  #ifdef USE_QUAD_PRECISION
    lis_free(lis_quad_scalar_tmp);
    lis_quad_x87_fpu_finalize(lis_x87_fpu_cw);
  #endif

  lis_free_all();
  
  #ifdef USE_MPI
    if (!lis_mpi_initialized) MPI_Finalize();
  #endif

  LIS_DEBUG_FUNC_OUT;

  return LIS_SUCCESS;
}
Exemple #2
0
LIS_INT lis_vector_destroy(LIS_VECTOR vec)
{
	LIS_DEBUG_FUNC_IN;
	if( lis_is_malloc(vec) )
	{
		if( vec->value && vec->is_destroy ) lis_free( vec->value );
		if( vec->work ) lis_free( vec->work );
		if( vec->ranges ) lis_free( vec->ranges );
		if( vec ) lis_free(vec);
	}
	LIS_DEBUG_FUNC_OUT;
	return LIS_SUCCESS;
}
LIS_INT lis_solver_destroy(LIS_SOLVER solver)
{
	LIS_DEBUG_FUNC_IN;

	if( solver )
	{
		lis_solver_work_destroy(solver);
		lis_vector_destroy(solver->d);
		if( solver->At ) lis_matrix_destroy(solver->At);
		if( solver->residual ) lis_free(solver->residual);
		lis_free(solver);
	}

	LIS_DEBUG_FUNC_OUT;
	return LIS_SUCCESS;
}
Exemple #4
0
LIS_INT lis_esolver_destroy(LIS_ESOLVER esolver)
{
	LIS_INT i,ss;
  
	LIS_DEBUG_FUNC_IN;

	if( esolver )
	{
		lis_esolver_work_destroy(esolver);
   	        if( esolver->rhistory ) lis_free(esolver->rhistory);
	        if( esolver->evalue ) lis_free(esolver->evalue);
 	        if( esolver->resid ) lis_free(esolver->resid);
 	        if( esolver->iter) lis_free(esolver->iter);
 	        if( esolver->iter2) lis_free(esolver->iter2);
   	        if( esolver->evector ) 
		  {
		    if ( esolver->options[LIS_EOPTIONS_ESOLVER] == LIS_ESOLVER_LI || esolver->options[LIS_EOPTIONS_ESOLVER] == LIS_ESOLVER_AI || esolver->options[LIS_EOPTIONS_ESOLVER] == LIS_ESOLVER_SI )
		      {
			ss=esolver->options[LIS_EOPTIONS_SUBSPACE];
			for(i=0;i<ss+2;i++) lis_vector_destroy(esolver->evector[i]);
		      }
		    lis_free(esolver->evector);
		  }
		lis_free(esolver);
	}

	LIS_DEBUG_FUNC_OUT;
	return LIS_SUCCESS;
}
LIS_INT lis_matrix_setDLU_csr(LIS_INT nnzl, LIS_INT nnzu, LIS_SCALAR *diag, LIS_INT *lptr, LIS_INT *lindex, LIS_SCALAR *lvalue,
              LIS_INT *uptr, LIS_INT *uindex, LIS_SCALAR *uvalue, LIS_MATRIX A)
{
  LIS_INT        err;
  LIS_MATRIX_DIAG  D;

  LIS_DEBUG_FUNC_IN;

#if 0
  err = lis_matrix_check(A,LIS_MATRIX_CHECK_SET);
  if( err ) return err;
#else
  if(lis_matrix_is_assembled(A))  return LIS_SUCCESS;
  else {
    err = lis_matrix_check(A,LIS_MATRIX_CHECK_SET);
    if( err ) return err;
  }
#endif

  A->L = (LIS_MATRIX_CORE)lis_calloc(sizeof(struct LIS_MATRIX_CORE_STRUCT), "lis_matrix_setDLU_csr::A->L");
  if( A->L==NULL )
  {
    LIS_SETERR_MEM(sizeof(struct LIS_MATRIX_CORE_STRUCT));
    return LIS_OUT_OF_MEMORY;
  }
  A->U = (LIS_MATRIX_CORE)lis_calloc(sizeof(struct LIS_MATRIX_CORE_STRUCT), "lis_matrix_setDLU_csr::A->U");
  if( A->U==NULL )
  {
    LIS_SETERR_MEM(sizeof(struct LIS_MATRIX_CORE_STRUCT));
    lis_matrix_DLU_destroy(A);
    return LIS_OUT_OF_MEMORY;
  }
  err = lis_matrix_diag_create(A->n,0,A->comm,&D);
  if( err )
  {
    lis_matrix_DLU_destroy(A);
    return err;
  }

  lis_free(D->value);
  D->value       = diag;
  A->D           = D;
  A->L->nnz      = nnzl;
  A->L->ptr      = lptr;
  A->L->index    = lindex;
  A->L->value    = lvalue;
  A->U->nnz      = nnzu;
  A->U->ptr      = uptr;
  A->U->index    = uindex;
  A->U->value    = uvalue;
  A->is_copy     = LIS_FALSE;
  A->status      = -LIS_MATRIX_CSR;
  A->is_splited  = LIS_TRUE;

  LIS_DEBUG_FUNC_OUT;
  return LIS_SUCCESS;
}
void lis_set_argv_end_f(LIS_INT *ierr)
{
  LIS_INT  i;

  LIS_DEBUG_FUNC_IN;

  for(i=0;i<f_argc_tmp;i++)
  {
    lis_free(f_argv_tmp[i]);
  }
  lis_free(f_argv_tmp);
  f_argv_tmp = NULL;
  f_argc_tmp = 0;
  *ierr = LIS_SUCCESS;

  LIS_DEBUG_FUNC_OUT;
  return;
}
Exemple #7
0
LIS_INT lis_quad_free(LIS_QUAD_PTR *a)
{
	LIS_DEBUG_FUNC_IN;

	lis_free(a->hi);

	LIS_DEBUG_FUNC_OUT;
	return LIS_SUCCESS;
}
LIS_INT lis_input_hb(LIS_MATRIX A, LIS_VECTOR b, LIS_VECTOR x, FILE *file)
{
  LIS_INT      err;
  LIS_INT      matrix_type;
  LIS_MATRIX  B;

  LIS_DEBUG_FUNC_IN;

  matrix_type = A->matrix_type;

  err = lis_input_hb_csr(A,b,x,file);
  if( err ) return err;

  if( matrix_type!=LIS_MATRIX_CSR && matrix_type!=LIS_MATRIX_CSC )
  {
    err = lis_matrix_duplicate(A,&B);
    if( err ) return err;
    lis_matrix_set_type(B,matrix_type);
    err = lis_matrix_convert(A,B);
    if( err ) return err;
    lis_matrix_storage_destroy(A);
    lis_matrix_DLU_destroy(A);
    lis_matrix_diag_destroy(A->WD);
    if( A->l2g_map ) lis_free( A->l2g_map );
    if( A->commtable ) lis_commtable_destroy( A->commtable );
    if( A->ranges ) lis_free( A->ranges );
    err = lis_matrix_copy_struct(B,A);
    if( err ) return err;
    lis_free(B);
    if( A->matrix_type==LIS_MATRIX_JAD )
    {
      A->work = (LIS_SCALAR *)lis_malloc(A->n*sizeof(LIS_SCALAR),"lis_input_hb::A->work");
      if( A->work==NULL )
      {
        LIS_SETERR_MEM(A->n*sizeof(LIS_SCALAR));
        return LIS_OUT_OF_MEMORY;
      }
    }
  }


  LIS_DEBUG_FUNC_OUT;
  return LIS_SUCCESS;
}
Exemple #9
0
LIS_INT lis_precon_create_adds(LIS_SOLVER solver, LIS_PRECON precon)
{
	LIS_INT	i,j;
	LIS_INT	precon_type,worklen;
	LIS_INT	err;
	LIS_VECTOR *work;

	LIS_DEBUG_FUNC_IN;

	precon_type = solver->options[LIS_OPTIONS_PRECON];
	worklen     = 2;
	work        = (LIS_VECTOR *)lis_malloc( worklen*sizeof(LIS_VECTOR),"lis_precon_create_adds::work" );
	if( work==NULL )
	{
		LIS_SETERR_MEM(worklen*sizeof(LIS_VECTOR));
		return LIS_OUT_OF_MEMORY;
	}
	if( solver->precision==LIS_PRECISION_DEFAULT )
	{
		for(i=0;i<worklen;i++)
		{
			err = lis_vector_duplicate(solver->A,&work[i]);
			if( err ) break;
		}
	}
	else
	{
		for(i=0;i<worklen;i++)
		{
			err = lis_vector_duplicateex(LIS_PRECISION_QUAD,solver->A,&work[i]);
			if( err ) break;
		}
	}
	if( i<worklen )
	{
		for(j=0;j<i;j++) lis_vector_destroy(work[j]);
		lis_free(work);
		return err;
	}
	precon->worklen = worklen;
	precon->work    = work;

	err = lis_precon_create_xxx[precon_type](solver,precon);
	if( err )
	{
		lis_precon_destroy(precon);
		return err;
	}

    precon->A       = solver->A;
    precon->is_copy = LIS_FALSE;

	LIS_DEBUG_FUNC_OUT;
	return err;
}
Exemple #10
0
LIS_INT lis_precon_destroy(LIS_PRECON precon)
{
	LIS_INT i;

	LIS_DEBUG_FUNC_IN;

	if( precon )
	{
		if( precon->is_copy ) lis_matrix_destroy(precon->A);
		lis_vector_destroy(precon->Pb);
		lis_vector_destroy(precon->D);
		lis_vector_destroy(precon->temp);
		lis_matrix_ilu_destroy(precon->L);
		lis_matrix_ilu_destroy(precon->U);
		lis_matrix_diag_destroy(precon->WD);
		if( precon->solver )
		{
			lis_vector_destroy(precon->solver->x);
			lis_precon_destroy(precon->solver->precon);
			lis_solver_destroy(precon->solver);
		}
#if defined(USE_SAAMG)
		lis_commtable_destroy(precon->commtable);
		if( precon->precon_type==LIS_PRECON_TYPE_SAAMG )
		{
		 (*(void (*)())f_clear_matrix_ptr)(&precon->level_num);
		}
#endif
		if( precon->work    )
		{
			for(i=0;i<precon->worklen;i++)
			{
				lis_vector_destroy(precon->work[i]);
			}
			lis_free(precon->work);
		}
		lis_free(precon);
	}

	LIS_DEBUG_FUNC_OUT;
	return LIS_SUCCESS;
}
Exemple #11
0
LIS_INT lis_gmres_malloc_work(LIS_SOLVER solver)
{
	LIS_VECTOR *work;
	LIS_INT	i,j,restart,worklen,err;

	LIS_DEBUG_FUNC_IN;

	restart = solver->options[LIS_OPTIONS_RESTART];
	worklen = NWORK + (restart+1);
	work    = (LIS_VECTOR *)lis_malloc( worklen*sizeof(LIS_VECTOR),"lis_gmres_malloc_work::work" );
	if( work==NULL )
	{
		LIS_SETERR_MEM(worklen*sizeof(LIS_VECTOR));
		return LIS_ERR_OUT_OF_MEMORY;
	}
	if( solver->precision==LIS_PRECISION_DEFAULT )
	{
		for(i=1;i<worklen;i++)
		{
			err = lis_vector_duplicate(solver->A,&work[i]);
			if( err ) break;
		}
	}
	else
	{
		for(i=1;i<worklen;i++)
		{
			err = lis_vector_duplicateex(LIS_PRECISION_QUAD,solver->A,&work[i]);
			if( err ) break;
			memset(work[i]->value_lo,0,solver->A->np*sizeof(LIS_SCALAR));
		}
	}
	if( i<worklen )
	{
		for(j=1;j<i;j++) lis_vector_destroy(work[j]);
		lis_free(work);
		return err;
	}
	if( solver->precision==LIS_PRECISION_DEFAULT )
	{
		lis_vector_create(solver->A->comm,&work[0]);
	}
	else
	{
		lis_vector_createex(LIS_PRECISION_QUAD,solver->A->comm,&work[0]);
	}
	lis_vector_set_size(work[0],restart+1,0);
	solver->worklen = worklen;
	solver->work    = work;


	LIS_DEBUG_FUNC_OUT;
	return LIS_SUCCESS;
}
LIS_INT lis_idrs_malloc_work(LIS_SOLVER solver)
{
	LIS_VECTOR	*work;
	LIS_INT			i,j,s,worklen,err;

	LIS_DEBUG_FUNC_IN;

	/*
	err = lis_matrix_convert(solver->A,&solver->At,LIS_MATRIX_CCS);
	if( err ) return err;
	*/

	s       = solver->options[LIS_OPTIONS_IDRS_RESTART];
	worklen = NWORK + 3*s;
	work    = (LIS_VECTOR *)lis_malloc(
worklen*sizeof(LIS_VECTOR),"lis_idrs_malloc_work::work" );
	if( work==NULL )
	{
		LIS_SETERR_MEM(worklen*sizeof(LIS_VECTOR));
		return LIS_ERR_OUT_OF_MEMORY;
	}
	if( solver->precision==LIS_PRECISION_DEFAULT )
	{
		for(i=0;i<worklen;i++)
		{
			err = lis_vector_duplicate(solver->A,&work[i]);
			if( err ) break;
		}
	}
	else
	{
		for(i=0;i<worklen;i++)
		{
			err =
lis_vector_duplicateex(LIS_PRECISION_QUAD,solver->A,&work[i]);
			if( err ) break;

memset(work[i]->value_lo,0,solver->A->np*sizeof(LIS_SCALAR));
		}
	}
	if( i<worklen )
	{
		for(j=0;j<i;j++) lis_vector_destroy(work[j]);
		lis_free(work);
		return err;
	}
	solver->worklen = worklen;
	solver->work    = work;

	LIS_DEBUG_FUNC_OUT;
	return LIS_SUCCESS;
}
LIS_INT lis_args_free(LIS_ARGS args)
{
  LIS_ARGS arg,t;

  LIS_DEBUG_FUNC_IN;

  arg = args->next;
  
  while( arg!=args )
  {
    t             = arg;
    arg           = arg->next;

    lis_free2(2,t->arg1,t->arg2);
    t->next->prev = t->prev;
    t->prev->next = t->next;
    lis_free(t);
  }
  if (args) lis_free(args);

  LIS_DEBUG_FUNC_OUT;
  return LIS_SUCCESS;
}
Exemple #14
0
LIS_INT lis_precon_register_free(void)
{

	LIS_DEBUG_FUNC_IN;

	if( precon_register_top )
	{
		lis_free(precon_register_top);
		precon_register_top = NULL;
	}

	LIS_DEBUG_FUNC_OUT;
	return LIS_SUCCESS;
}
Exemple #15
0
LIS_INT lis_vector_unset(LIS_VECTOR vec)
{
	LIS_INT	err;

	LIS_DEBUG_FUNC_IN;

	err = lis_vector_check(vec,LIS_VECTOR_CHECK_NULL);
	if( err ) return err;

	if( vec->is_copy ) lis_free(vec->value);
	vec->value  = NULL;
	vec->status = LIS_VECTOR_NULL;

	LIS_DEBUG_FUNC_OUT;
	return LIS_SUCCESS;
}
Exemple #16
0
LIS_INT lis_esolver_work_destroy(LIS_ESOLVER esolver)
{
	LIS_INT i;

	LIS_DEBUG_FUNC_IN;

	if( esolver && esolver->work )
	{
		for(i=0;i<esolver->worklen;i++) lis_vector_destroy(esolver->work[i]);
		lis_free(esolver->work);
		esolver->work    = NULL;
		esolver->worklen = 0;
	}

	LIS_DEBUG_FUNC_OUT;
	return LIS_SUCCESS;
}
LIS_INT lis_solver_work_destroy(LIS_SOLVER solver)
{
	LIS_INT i;

	LIS_DEBUG_FUNC_IN;

	if( solver && solver->work )
	{
		for(i=0;i<solver->worklen;i++) lis_vector_destroy(solver->work[i]);
		lis_free(solver->work);
		solver->work    = NULL;
		solver->worklen = 0;
	}

	LIS_DEBUG_FUNC_OUT;
	return LIS_SUCCESS;
}
Exemple #18
0
LIS_INT lis_vector_set(LIS_VECTOR vec, LIS_SCALAR *value)
{
	LIS_INT	err;

	LIS_DEBUG_FUNC_IN;

	err = lis_vector_check(vec,LIS_VECTOR_CHECK_NULL);
	if( err ) return err;

	if( vec->is_destroy ) lis_free(vec->value);
	vec->value   = value;
	vec->is_copy = LIS_FALSE;

	vec->status = LIS_VECTOR_ASSEMBLING;

	LIS_DEBUG_FUNC_OUT;
	return LIS_SUCCESS;
}
LIS_INT lis_esi_malloc_work(LIS_ESOLVER esolver)
{
	LIS_VECTOR	*work;
	LIS_INT			i,j,worklen,err,ss;

	LIS_DEBUG_FUNC_IN;

	ss = esolver->options[LIS_EOPTIONS_SUBSPACE];

	worklen = NWORK + ss;
	work    = (LIS_VECTOR *)lis_malloc( worklen*sizeof(LIS_VECTOR),"lis_esi_malloc_work::work" );
	if( work==NULL )
	{
		LIS_SETERR_MEM(worklen*sizeof(LIS_VECTOR));
		return LIS_ERR_OUT_OF_MEMORY;
	}
	if( esolver->eprecision==LIS_PRECISION_DEFAULT )
	{
		for(i=0;i<worklen;i++)
		{
			err = lis_vector_duplicate(esolver->A,&work[i]);
			if( err ) break;
		}
	}
	else
	{
		for(i=0;i<worklen;i++)
		{
			err = lis_vector_duplicateex(LIS_PRECISION_QUAD,esolver->A,&work[i]);
			if( err ) break;
		}
	}
	if( i<worklen )
	{
		for(j=0;j<i;j++) lis_vector_destroy(work[j]);
		lis_free(work);
		return err;
	}
	esolver->worklen = worklen;
	esolver->work    = work;

	LIS_DEBUG_FUNC_OUT;
	return LIS_SUCCESS;
}
LIS_INT lis_cgs_malloc_work(LIS_SOLVER solver)
{
	LIS_VECTOR	*work;
	LIS_INT			i,j,worklen,err;

	LIS_DEBUG_FUNC_IN;

	worklen = NWORK;
	work    = (LIS_VECTOR *)lis_malloc( worklen*sizeof(LIS_VECTOR),"lis_cgs_malloc_work::work" );
	if( work==NULL )
	{
		LIS_SETERR_MEM(worklen*sizeof(LIS_VECTOR));
		return LIS_ERR_OUT_OF_MEMORY;
	}
	if( solver->precision==LIS_PRECISION_DEFAULT )
	{
		for(i=0;i<worklen;i++)
		{
			err = lis_vector_duplicate(solver->A,&work[i]);
			if( err ) break;
		}
	}
	else
	{
		for(i=0;i<worklen;i++)
		{
			err = lis_vector_duplicateex(LIS_PRECISION_QUAD,solver->A,&work[i]);
			if( err ) break;
			memset(work[i]->value_lo,0,solver->A->np*sizeof(LIS_SCALAR));
		}
	}
	if( i<worklen )
	{
		for(j=0;j<i;j++) lis_vector_destroy(work[j]);
		lis_free(work);
		return err;
	}
	solver->worklen = worklen;
	solver->work    = work;

	LIS_DEBUG_FUNC_OUT;
	return LIS_SUCCESS;
}
LIS_INT lis_matrix_ilu_destroy(LIS_MATRIX_ILU A)
{
	LIS_INT i,j;

	LIS_DEBUG_FUNC_IN;

	if( lis_is_malloc(A) )
	{
		if( A->bsz )
		{
			for(i=0;i<A->n;i++)
			{
				free(A->index[i]);
				for(j=0;j<A->nnz[i];j++)
				{
					free(A->values[i][j]);
				}
				if( A->nnz[i]>0 ) free(A->values[i]);
			}
			lis_free2(5,A->bsz,A->nnz,A->index,A->values,A->nnz_ma);
		}
		else
		{
			for(i=0;i<A->n;i++)
			{
				if( A->nnz[i]>0 )
				{
					free(A->index[i]);
					free(A->value[i]);
				}
			}
			lis_free2(4,A->nnz,A->index,A->value,A->nnz_ma);
		}
		lis_free(A);
	}

	LIS_DEBUG_FUNC_OUT;
	return LIS_SUCCESS;
}
Exemple #22
0
LIS_INT lis_matrix_split2_csr(LIS_MATRIX A)
{
	LIS_INT i,j,n;
	LIS_INT nnzl,nnzu;
	LIS_INT err;
	LIS_INT *lptr,*lindex,*uptr,*uindex;
	LIS_SCALAR *lvalue,*uvalue;
	#ifdef _OPENMP
		LIS_INT kl,ku;
		LIS_INT *liw,*uiw;
	#endif

	LIS_DEBUG_FUNC_IN;

	n        = A->n;
	nnzl     = 0;
	nnzu     = 0;
	lptr     = NULL;
	lindex   = NULL;
	lvalue   = NULL;
	uptr     = NULL;
	uindex   = NULL;
	uvalue   = NULL;

	#ifdef _OPENMP
		liw = (LIS_INT *)lis_malloc((n+1)*sizeof(LIS_INT),"lis_matrix_split2_csr::liw");
		if( liw==NULL )
		{
			LIS_SETERR_MEM((n+1)*sizeof(LIS_INT));
			return LIS_OUT_OF_MEMORY;
		}
		uiw = (LIS_INT *)lis_malloc((n+1)*sizeof(LIS_INT),"lis_matrix_split2_csr::uiw");
		if( uiw==NULL )
		{
			LIS_SETERR_MEM((n+1)*sizeof(LIS_INT));
			lis_free(liw);
			return LIS_OUT_OF_MEMORY;
		}
		#pragma omp parallel for private(i)
		for(i=0;i<n+1;i++)
		{
			liw[i] = 0;
			uiw[i] = 0;
		}
		#pragma omp parallel for private(i,j)
		for(i=0;i<n;i++)
		{
			for(j=A->ptr[i];j<A->ptr[i+1];j++)
			{
				if( A->index[j]<n )
				{
					liw[i+1]++;
				}
				else
				{
					uiw[i+1]++;
				}
			}
		}
		for(i=0;i<n;i++)
		{
			liw[i+1] += liw[i];
			uiw[i+1] += uiw[i];
		}
		nnzl = liw[n];
		nnzu = uiw[n];
	#else
		for(i=0;i<n;i++)
		{
			for(j=A->ptr[i];j<A->ptr[i+1];j++)
			{
				if( A->index[j]<n )
				{
					nnzl++;
				}
				else
				{
					nnzu++;
				}
			}
		}
	#endif

	err = lis_matrix_LU_create(A);
	if( err )
	{
		return err;
	}
	err = lis_matrix_malloc_csr(n,nnzl,&lptr,&lindex,&lvalue);
	if( err )
	{
		return err;
	}
	err = lis_matrix_malloc_csr(n,nnzu,&uptr,&uindex,&uvalue);
	if( err )
	{
		lis_free2(6,lptr,lindex,lvalue,uptr,uindex,uvalue);
		return err;
	}

	#ifdef _OPENMP
		#pragma omp parallel for private(i)
		for(i=0;i<n+1;i++)
		{
			lptr[i] = liw[i];
			uptr[i] = uiw[i];
		}
		#pragma omp parallel for private(i,j,kl,ku)
		for(i=0;i<n;i++)
		{
			kl = lptr[i];
			ku = uptr[i];
			for(j=A->ptr[i];j<A->ptr[i+1];j++)
			{
				if( A->index[j]<n )
				{
					lindex[kl]   = A->index[j];
					lvalue[kl]   = A->value[j];
					kl++;
				}
				else
				{
					uindex[ku]   = A->index[j];
					uvalue[ku]   = A->value[j];
					ku++;
				}
			}
		}
		lis_free2(2,liw,uiw);
	#else
		nnzl = 0;
		nnzu = 0;
		lptr[0] = 0;
		uptr[0] = 0;
		for(i=0;i<n;i++)
		{
			for(j=A->ptr[i];j<A->ptr[i+1];j++)
			{
				if( A->index[j]<n )
				{
					lindex[nnzl]   = A->index[j];
					lvalue[nnzl]   = A->value[j];
					nnzl++;
				}
				else
				{
					uindex[nnzu]   = A->index[j];
					uvalue[nnzu]   = A->value[j];
					nnzu++;
				}
			}
			lptr[i+1] = nnzl;
			uptr[i+1] = nnzu;
		}
	#endif
	A->L->nnz     = nnzl;
	A->L->ptr     = lptr;
	A->L->index   = lindex;
	A->L->value   = lvalue;
	A->U->nnz     = nnzu;
	A->U->ptr     = uptr;
	A->U->index   = uindex;
	A->U->value   = uvalue;
	A->is_splited = LIS_TRUE;

	LIS_DEBUG_FUNC_OUT;
	return LIS_SUCCESS;
}
Exemple #23
0
LIS_INT lis_matrix_copyDLU_csr(LIS_MATRIX Ain, LIS_MATRIX_DIAG *D, LIS_MATRIX *L, LIS_MATRIX *U)
{
	LIS_INT err;
	LIS_INT i,n,np,lnnz,unnz;
	LIS_INT *lptr,*lindex;
	LIS_INT *uptr,*uindex;
	LIS_SCALAR *lvalue,*uvalue,*diag;

	LIS_DEBUG_FUNC_IN;
	
	*D = NULL;
	*L = NULL;
	*U = NULL;

	err = lis_matrix_check(Ain,LIS_MATRIX_CHECK_ALL);
	if( err ) return err;

	n       = Ain->n;
	np      = Ain->np;

	err = lis_matrix_duplicate(Ain,L);
	if( err )
	{
		return err;
	}
	err = lis_matrix_duplicate(Ain,U);
	if( err )
	{
		lis_matrix_destroy(*L);
		return err;
	}
	err = lis_matrix_diag_duplicateM(Ain,D);
	if( err )
	{
		lis_matrix_destroy(*L);
		lis_matrix_destroy(*U);
		return err;
	}
	lis_free((*D)->value);

	if( Ain->is_splited )
	{
	}
	lnnz     = Ain->L->nnz;
	unnz     = Ain->U->nnz;
	lptr     = NULL;
	lindex   = NULL;
	uptr     = NULL;
	uindex   = NULL;
	diag     = NULL;

	err = lis_matrix_malloc_csr(n,lnnz,&lptr,&lindex,&lvalue);
	if( err )
	{
		return err;
	}
	err = lis_matrix_malloc_csr(n,unnz,&uptr,&uindex,&uvalue);
	if( err )
	{
		lis_free2(7,diag,uptr,lptr,uindex,lindex,uvalue,lvalue);
		return err;
	}
	diag = (LIS_SCALAR *)lis_malloc(np*sizeof(LIS_SCALAR),"lis_matrix_copyDLU_csr::diag");
	if( diag==NULL )
	{
		lis_free2(7,diag,uptr,lptr,uindex,lindex,uvalue,lvalue);
		return err;
	}

	#ifdef _OPENMP
	#pragma omp parallel for private(i)
	#endif
	for(i=0;i<n;i++)
	{
		diag[i] = Ain->D->value[i];
	}
	lis_matrix_elements_copy_csr(n,Ain->L->ptr,Ain->L->index,Ain->L->value,lptr,lindex,lvalue);
	lis_matrix_elements_copy_csr(n,Ain->U->ptr,Ain->U->index,Ain->U->value,uptr,uindex,uvalue);

	(*D)->value = diag;
	err = lis_matrix_set_csr(lnnz,lptr,lindex,lvalue,*L);
	if( err )
	{
		lis_free2(7,diag,uptr,lptr,uindex,lindex,uvalue,lvalue);
		return err;
	}
	err = lis_matrix_set_csr(unnz,uptr,uindex,uvalue,*U);
	if( err )
	{
		lis_free2(7,diag,uptr,lptr,uindex,lindex,uvalue,lvalue);
		return err;
	}

	err = lis_matrix_assemble(*L);
	if( err )
	{
		return err;
	}
	err = lis_matrix_assemble(*U);
	if( err )
	{
		return err;
	}
	LIS_DEBUG_FUNC_OUT;
	return LIS_SUCCESS;
}
Exemple #24
0
void lis_matvect_ell(LIS_MATRIX A, LIS_SCALAR x[], LIS_SCALAR y[])
{
	LIS_INT i,j,jj;
	LIS_INT n,np,maxnzr;
	#ifdef _OPENMP
		LIS_INT k,is,ie,nprocs;
		LIS_SCALAR t;
		LIS_SCALAR *w;
	#endif

	n      = A->n;
	np     = A->np;
	if( A->is_splited )
	{
		#ifdef USE_VEC_COMP
		#pragma cdir nodep
		#endif
		for(i=0; i<n; i++)
		{
			y[i] = A->D->value[i]*x[i];
		}
		for(j=0;j<A->L->maxnzr;j++)
		{
			jj = j*n;
			#ifdef USE_VEC_COMP
			#pragma cdir nodep
			#endif
			for(i=0;i<n;i++)
			{
				y[A->L->index[jj + i]] += A->L->value[jj + i] * x[i];
			}
		}
		for(j=0;j<A->U->maxnzr;j++)
		{
			jj = j*n;
			#ifdef USE_VEC_COMP
			#pragma cdir nodep
			#endif
			for(i=0;i<n;i++)
			{
				y[A->U->index[jj + i]] += A->U->value[jj + i] * x[i];
			}
		}
	}
	else
	{
		#ifdef _OPENMP
			maxnzr = A->maxnzr;
			nprocs = omp_get_max_threads();
			w = (LIS_SCALAR *)lis_malloc( nprocs*np*sizeof(LIS_SCALAR),"lis_matvect_ell::w" );
			#pragma omp parallel private(i,j,t,jj,k,is,ie)
			{
				k = omp_get_thread_num();
				LIS_GET_ISIE(k,nprocs,n,is,ie);
				#pragma omp for
				for(j=0;j<nprocs;j++)
				{
					memset( &w[j*np], 0, np*sizeof(LIS_SCALAR) );
				}
				for(j=0;j<maxnzr;j++)
				{
					jj = j*n;
					#ifdef USE_VEC_COMP
					#pragma cdir nodep
					#endif
					for(i=is;i<ie;i++)
					{
						w[k*np + A->index[jj + i]] += A->value[jj + i] * x[i];
					}
				}
				#pragma omp barrier
				#pragma omp for 
				#ifdef USE_VEC_COMP
				#pragma cdir nodep
				#endif
				for(i=0;i<np;i++)
				{
					t = 0.0;
					for(j=0;j<nprocs;j++)
					{
						t += w[j*np+i];
					}
					y[i] = t;
				}
			}
			lis_free(w);
		#else
			maxnzr = A->maxnzr;
			#ifdef USE_VEC_COMP
			#pragma cdir nodep
			#endif
			for(i=0; i<n; i++)
			{
				y[i] = 0.0;
			}
			for(j=0;j<maxnzr;j++)
			{
				jj = j*n;
				#ifdef USE_VEC_COMP
				#pragma cdir nodep
				#endif
				for(i=0;i<n;i++)
				{
					y[A->index[jj + i]] += A->value[jj + i] * x[i];
				}
			}
		#endif
	}
}
LIS_INT lis_matvec_ilu(LIS_MATRIX A, LIS_MATRIX_ILU LU, LIS_VECTOR X, LIS_VECTOR Y)
{
	LIS_INT i,j,jj,n,np;
	LIS_SCALAR *x,*y;
	#ifdef _OPENMP
		LIS_INT nprocs,k;
		LIS_SCALAR t,*w;
	#endif
	#ifdef USE_QUAD_PRECISION
		LIS_INT j0,j1;
		#ifdef _OPENMP
				LIS_SCALAR *ww,*wwl;
		#endif
	#endif
	LIS_QUAD_DECLAR;

	LIS_DEBUG_FUNC_IN;

	np = A->np;
	n  = LU->n;
	x  = X->value;
	y  = Y->value;

	#ifdef USE_QUAD_PRECISION
	if( X->precision==LIS_PRECISION_DEFAULT )
	#endif
	{
		#ifdef USE_MPI
			LIS_MATVEC_SENDRECV;
		#endif
		#ifdef _OPENMP
			nprocs = omp_get_max_threads();
			w = (LIS_SCALAR *)lis_malloc( nprocs*np*sizeof(LIS_SCALAR),"lis_matvect_crs::w" );
			#pragma omp parallel private(i,j,k,jj,t)
			{
				k = omp_get_thread_num();
				#pragma omp for
				for(j=0;j<nprocs;j++)
				{
					memset( &w[j*np], 0, np*sizeof(LIS_SCALAR) );
				}
				#pragma omp for 
				for(i=0;i<n;i++)
				{
					for(j=0;j<LU->nnz[i];j++)
					{
						jj = k*np + LU->index[i][j];
						w[jj] += LU->value[i][j] * X->value[i];
					}
				}
				#pragma omp for 
				for(i=0;i<np;i++)
				{
					t = 0.0;
					for(j=0;j<nprocs;j++)
					{
						t += w[j*np+i];
					}
					Y->value[i] = t;
				}
			}
			lis_free(w);
		#else
			for(i=0;i<np;i++)
			{
				Y->value[i] = 0.0;
			}
			for(i=0;i<n;i++)
			{
				for(j=0;j<LU->nnz[i];j++)
				{
					jj = LU->index[i][j];
					Y->value[jj] += LU->value[i][j] * X->value[i];
				}
			}
		#endif
	}
	#ifdef USE_QUAD_PRECISION
	else
	{
		#ifdef USE_MPI
			lis_send_recv_mp(A->commtable,X);
		#endif
		#ifdef _OPENMP
			#ifndef USE_FMA2_SSE2
				nprocs = omp_get_max_threads();
				ww  = (LIS_SCALAR *)lis_malloc( 2*nprocs*np*sizeof(LIS_SCALAR),"lis_matvect_crs_mp::ww" );
				wwl = &ww[nprocs*np];
				#ifndef USE_SSE2
					#pragma omp parallel private(i,j,jj,k,p1,p2,tq,bhi,blo,chi,clo,sh,sl,th,tl,eh,el)
				#else
					#pragma omp parallel private(i,j,jj,k,bh,ch,sh,wh,th,bl,cl,sl,wl,tl,p1,p2,t0,t1,t2,eh)
				#endif
				{
					k = omp_get_thread_num();
					#pragma omp for
					for(j=0;j<nprocs;j++)
					{
						memset( &ww[j*np], 0, np*sizeof(LIS_SCALAR) );
						memset( &wwl[j*np], 0, np*sizeof(LIS_SCALAR) );
					}
					#pragma omp for 
					for(i=0;i<n;i++)
					{
						for(j=0;j<LU->nnz[i];j++)
						{
							jj  = k*np + LU->index[i][j];
							#ifndef USE_SSE2
								LIS_QUAD_FMAD(ww[jj],wwl[jj],ww[jj],wwl[jj],X->value[i],X->value_lo[i],LU->value[i][j]);
							#else
								LIS_QUAD_FMAD_SSE2(ww[jj],wwl[jj],ww[jj],wwl[jj],X->value[i],X->value_lo[i],LU->value[i][j]);
							#endif
						}
					}
					#pragma omp for 
					for(i=0;i<np;i++)
					{
						Y->value[i] = Y->value_lo[i] = 0.0;
						for(j=0;j<nprocs;j++)
						{
							#ifndef USE_SSE2
								LIS_QUAD_ADD(Y->value[i],Y->value_lo[i],Y->value[i],Y->value_lo[i],ww[j*np+i],wwl[j*np+i]);
							#else
								LIS_QUAD_ADD_SSE2(Y->value[i],Y->value_lo[i],Y->value[i],Y->value_lo[i],ww[j*np+i],wwl[j*np+i]);
							#endif
						}
					}
				}
				lis_free(ww);
			#else
				nprocs = omp_get_max_threads();
				ww  = (LIS_SCALAR *)lis_malloc( 2*nprocs*np*sizeof(LIS_SCALAR), "lis_matvect_crs_mp2::ww" );
				wwl = &ww[nprocs*np];
				#pragma omp parallel private(i,j,j0,j1,k,bh,ch,sh,wh,th,bl,cl,sl,wl,tl,p1,p2,t0,t1,t2,eh)
				{
					k = omp_get_thread_num();
					#pragma omp for
					for(j=0;j<nprocs;j++)
					{
						memset( &ww[j*np], 0, np*sizeof(LIS_SCALAR) );
						memset( &wwl[j*np], 0, np*sizeof(LIS_SCALAR) );
					}
					#pragma omp for
					for(i=0; i<n; i++)
					{
						for(j=0;j<LU->nnz[i]-1;j+=2)
						{
							j0  = k*np + LU->index[i][j];
							j1  = k*np + LU->index[i][j+1];
							#ifdef USE_SSE2
								LIS_QUAD_FMAD2_SSE2_STSD(ww[j0],wwl[j0],ww[j1],wwl[j1],ww[j0],wwl[j0],ww[j1],wwl[j1],X->value[i],X->value_lo[i],X->value[i],X->value_lo[i],LU->value[i][j]);
							#endif
						}
						for(;j<LU->nnz[i];j++)
						{
							j0  = LU->index[i][j];
							#ifdef USE_SSE2
								LIS_QUAD_FMAD_SSE2(ww[j0],wwl[j0],ww[j0],wwl[j0],X->value[i],X->value_lo[i],LU->value[i][j]);
							#endif
						}
					}
					#pragma omp for 
					for(i=0;i<np;i++)
					{
						Y->value[i] = Y->value_lo[i] = 0.0;
						for(j=0;j<nprocs;j++)
						{
							#ifdef USE_SSE2
								LIS_QUAD_ADD_SSE2(Y->value[i],Y->value_lo[i],Y->value[i],Y->value_lo[i],ww[j*np+i],wwl[j*np+i]);
							#endif
						}
					}
				}
				lis_free(ww);
			#endif
		#else
			#ifndef USE_FMA2_SSE2
				for(i=0;i<np;i++)
				{
					Y->value[i]    = 0.0;
					Y->value_lo[i] = 0.0;
				}
				for(i=0;i<n;i++)
				{
					for(j=0;j<LU->nnz[i];j++)
					{
						jj  = LU->index[i][j];
						#ifndef USE_SSE2
							LIS_QUAD_FMAD(Y->value[jj],Y->value_lo[jj],Y->value[jj],Y->value_lo[jj],X->value[i],X->value_lo[i],LU->value[i][j]);
						#else
							LIS_QUAD_FMAD_SSE2(Y->value[jj],Y->value_lo[jj],Y->value[jj],Y->value_lo[jj],X->value[i],X->value_lo[i],LU->value[i][j]);
						#endif
					}
				}
			#else
				for(i=0; i<np; i++)
				{
					Y->value[i]  = 0.0;
					Y->value_lo[i] = 0.0;
				}
				for(i=0; i<n; i++)
				{
					for(j=0;j<LU->nnz[i]-1;j+=2)
					{
						j0  = LU->index[i][j];
						j1  = LU->index[i][j+1];
						#ifdef USE_SSE2
							LIS_QUAD_FMAD2_SSE2_STSD(Y->value[j0],Y->value_lo[j0],Y->value[j1],Y->value_lo[j1],Y->value[j0],Y->value_lo[j0],Y->value[j1],Y->value_lo[j1],X->value[i],X->value_lo[i],X->value[i],X->value_lo[i],LU->value[i][j]);
						#endif
					}
					for(;j<LU->nnz[i];j++)
					{
						j0  = LU->index[i][j];
						#ifdef USE_SSE2
							LIS_QUAD_FMAD_SSE2(Y->value[j0],Y->value_lo[j0],Y->value[j0],Y->value_lo[j0],X->value[i],X->value_lo[i],LU->value[i][j]);
						#endif
					}
				}
			#endif
		#endif
	}
	#endif

	LIS_DEBUG_FUNC_OUT;
	return LIS_SUCCESS;
}
Exemple #26
0
LIS_INT lis_fgmres_quad(LIS_SOLVER solver)
{
	LIS_MATRIX A;
	LIS_VECTOR b,x;
	LIS_VECTOR r,s,*z,*v;
	LIS_QUAD *h;
	LIS_QUAD_PTR aa,bb,rr,a2,b2,t,one,tmp;

	LIS_REAL bnrm2,nrm2,tol;
	LIS_INT iter,maxiter,n,output;
	double time,ptime;

	LIS_REAL rnorm;
	LIS_INT i,j,k,m;
	LIS_INT ii,i1,iiv,i1v,iih,jj;
	LIS_INT h_dim;
	LIS_INT cs,sn;

	LIS_DEBUG_FUNC_IN;

	A       = solver->A;
	b       = solver->b;
	x       = solver->x;
	n       = A->n;
	maxiter = solver->options[LIS_OPTIONS_MAXITER];
	output  = solver->options[LIS_OPTIONS_OUTPUT];
	m       = solver->options[LIS_OPTIONS_RESTART];
	h_dim   = m+1;
	ptime   = 0.0;

	s       = solver->work[0];
	r       = solver->work[1];
	z       = &solver->work[2];
	v       = &solver->work[m+2];

	h       = (LIS_QUAD *)lis_malloc( sizeof(LIS_QUAD)*(h_dim+1)*(h_dim+2),"lis_fgmres_quad::h" );
	cs      = (m+1)*h_dim;
	sn      = (m+2)*h_dim;

	LIS_QUAD_SCALAR_MALLOC(aa,0,1);
	LIS_QUAD_SCALAR_MALLOC(bb,1,1);
	LIS_QUAD_SCALAR_MALLOC(rr,2,1);
	LIS_QUAD_SCALAR_MALLOC(a2,3,1);
	LIS_QUAD_SCALAR_MALLOC(b2,4,1);
	LIS_QUAD_SCALAR_MALLOC(t,5,1);
	LIS_QUAD_SCALAR_MALLOC(tmp,6,1);
	LIS_QUAD_SCALAR_MALLOC(one,7,1);

	one.hi[0]   = 1.0;
	one.lo[0]   = 0.0;

	/* Initial Residual */
	if( lis_solver_get_initial_residual(solver,NULL,NULL,v[0],&bnrm2) )
	{
		lis_free(h);
		LIS_DEBUG_FUNC_OUT;
		return LIS_SUCCESS;
	}
	tol     = solver->tol;
	rnorm   = 1.0/bnrm2;


	iter=0;
	while( iter<maxiter )
	{
		/* first column of V */
		/* v = r / ||r||_2 */
		lis_vector_scaleex_nm(bnrm2,v[0]);

		/* s = ||r||_2 e_1 */
		lis_vector_set_allex_nm(0.0,s);
		s->value[0]    = rnorm;
		s->value_lo[0] = 0.0;

		i = 0;
		do
		{
			iter++;
			i++;
			ii  = i-1;
			i1  = i;
			iiv = i-1;
			i1v = i;
			iih = (i-1)*h_dim;


			/* z = M^-1 * v */
			time = lis_wtime();
			lis_psolve(solver,v[iiv],z[iiv]);
			ptime += lis_wtime()-time;

			/* w = A * z */
			lis_matvec(A,z[iiv], v[i1v]);

			for(k=0;k<i;k++)
			{
				/* h[k,i]   = <w,v[k]>          */
				/* w        = w - h[k,i] * v[k] */
				lis_vector_dotex_mmm(v[i1v],v[k],&t);
				h[k+iih].hi = t.hi[0];
				h[k+iih].lo = t.lo[0];
				lis_quad_minus((LIS_QUAD *)t.hi);
				lis_vector_axpyex_mmm(t,v[k],v[i1v]);
			}
			/* h[i+1,i] = ||w||          */
			/* v[i+1]   = w / h[i+1,i]   */
			lis_vector_nrm2ex_mm(v[i1v],&t);
			h[i1+iih].hi = t.hi[0];
			h[i1+iih].lo = t.lo[0];
			lis_quad_div((LIS_QUAD *)tmp.hi,(LIS_QUAD *)one.hi,(LIS_QUAD *)t.hi);
			lis_vector_scaleex_mm(tmp,v[i1v]);

			for(k=1;k<=ii;k++)
			{
				jj  = k-1;
				t.hi[0]   =  h[jj+iih].hi;
				t.lo[0]   =  h[jj+iih].lo;
				lis_quad_mul((LIS_QUAD *)aa.hi,(LIS_QUAD *)&h[jj+cs],(LIS_QUAD *)t.hi);
				lis_quad_mul((LIS_QUAD *)tmp.hi,(LIS_QUAD *)&h[jj+sn],(LIS_QUAD *)&h[k+iih]);
				lis_quad_add((LIS_QUAD *)aa.hi,(LIS_QUAD *)aa.hi,(LIS_QUAD *)tmp.hi);
				lis_quad_mul((LIS_QUAD *)bb.hi,(LIS_QUAD *)&h[jj+sn],(LIS_QUAD *)t.hi);
				lis_quad_minus((LIS_QUAD *)bb.hi);
				lis_quad_mul((LIS_QUAD *)tmp.hi,(LIS_QUAD *)&h[jj+cs],(LIS_QUAD *)&h[k+iih]);
				lis_quad_add((LIS_QUAD *)bb.hi,(LIS_QUAD *)bb.hi,(LIS_QUAD *)tmp.hi);
				h[jj+iih].hi = aa.hi[0];
				h[jj+iih].lo = aa.lo[0];
				h[k+iih].hi = bb.hi[0];
				h[k+iih].lo = bb.lo[0];
			}
			aa.hi[0] = h[ii+iih].hi;
			aa.lo[0] = h[ii+iih].lo;
			bb.hi[0] = h[i1+iih].hi;
			bb.lo[0] = h[i1+iih].lo;
			lis_quad_sqr((LIS_QUAD *)a2.hi,(LIS_QUAD *)aa.hi);
			lis_quad_sqr((LIS_QUAD *)b2.hi,(LIS_QUAD *)bb.hi);
			lis_quad_add((LIS_QUAD *)rr.hi,(LIS_QUAD *)a2.hi,(LIS_QUAD *)b2.hi);
			lis_quad_sqrt((LIS_QUAD *)rr.hi,(LIS_QUAD *)rr.hi);
			if( rr.hi[0]==0.0 )
			{
				rr.hi[0]=1.0e-17;
				rr.lo[0]=0.0;
			}
			lis_quad_div((LIS_QUAD *)&h[ii+cs],(LIS_QUAD *)aa.hi,(LIS_QUAD *)rr.hi);
			lis_quad_div((LIS_QUAD *)&h[ii+sn],(LIS_QUAD *)bb.hi,(LIS_QUAD *)rr.hi);
			tmp.hi[0] = s->value[ii];
			tmp.lo[0] = s->value_lo[ii];
			lis_quad_mul((LIS_QUAD *)aa.hi,(LIS_QUAD *)&h[ii+sn],(LIS_QUAD *)tmp.hi);
			lis_quad_mul((LIS_QUAD *)bb.hi,(LIS_QUAD *)&h[ii+cs],(LIS_QUAD *)tmp.hi);
			lis_quad_minus((LIS_QUAD *)aa.hi);
			s->value[i1] = aa.hi[0];
			s->value_lo[i1] = aa.lo[0];
			s->value[ii] = bb.hi[0];
			s->value_lo[ii] = bb.lo[0];

			lis_quad_mul((LIS_QUAD *)aa.hi,(LIS_QUAD *)&h[ii+cs],(LIS_QUAD *)&h[ii+iih]);
			lis_quad_mul((LIS_QUAD *)tmp.hi,(LIS_QUAD *)&h[ii+sn],(LIS_QUAD *)&h[i1+iih]);
			lis_quad_add((LIS_QUAD *)aa.hi,(LIS_QUAD *)aa.hi,(LIS_QUAD *)tmp.hi);
			h[ii+iih].hi = aa.hi[0];
			h[ii+iih].lo = aa.lo[0];

			/* convergence check */
			nrm2 = fabs(s->value[i1]);

			if( output )
			{
				if( output & LIS_PRINT_MEM ) solver->rhistory[iter] = nrm2;
				if( output & LIS_PRINT_OUT && A->my_rank==0 ) lis_print_rhistory(iter,nrm2);
			}

			if( tol >= nrm2 ) break;
		} while( i<m && iter <maxiter );

		/* Solve H * Y = S for upper Hessenberg matrix H */
		tmp.hi[0] = s->value[ii];
		tmp.lo[0] = s->value_lo[ii];
		lis_quad_div((LIS_QUAD *)tmp.hi,(LIS_QUAD *)tmp.hi,(LIS_QUAD *)&h[ii+iih]);
		s->value[ii] = tmp.hi[0];
		s->value_lo[ii] = tmp.lo[0];
		for(k=1;k<=ii;k++)
		{
			jj = ii-k;
			t.hi[0]  = s->value[jj];
			t.lo[0]  = s->value_lo[jj];
			for(j=jj+1;j<=ii;j++)
			{
				tmp.hi[0] = s->value[j];
				tmp.lo[0] = s->value_lo[j];
				lis_quad_mul((LIS_QUAD *)tmp.hi,(LIS_QUAD *)tmp.hi,(LIS_QUAD *)&h[jj+j*h_dim]);
				lis_quad_sub((LIS_QUAD *)t.hi,(LIS_QUAD *)t.hi,(LIS_QUAD *)tmp.hi);
			}
			lis_quad_div((LIS_QUAD *)tmp.hi,(LIS_QUAD *)t.hi,(LIS_QUAD *)&h[jj+jj*h_dim]);
			s->value[jj] = tmp.hi[0];
			s->value_lo[jj] = tmp.lo[0];
		}
		/* x = x + y * z */
		for(j=0;j<=ii;j++)
		{
			aa.hi[0] = s->value[j];
			aa.lo[0] = s->value_lo[j];
			lis_vector_axpyex_mmm(aa,z[j],x);
		}

		if( tol >= nrm2 )
		{
			solver->retcode    = LIS_SUCCESS;
			solver->iter       = iter;
			solver->resid      = nrm2;
			solver->ptime      = ptime;
			lis_free(h);
			LIS_DEBUG_FUNC_OUT;
			return LIS_SUCCESS;
		}

		lis_matvec(A,x,v[0]);
		lis_vector_xpay(b,-1.0,v[0]);
		memset(v[0]->value_lo,0,n*sizeof(LIS_SCALAR));
		lis_vector_nrm2(v[0],&rnorm);
		bnrm2 = 1.0/rnorm;
	}

	solver->retcode   = LIS_MAXITER;
	solver->iter      = iter+1;
	solver->resid     = nrm2;
	lis_free(h);
	LIS_DEBUG_FUNC_OUT;
	return LIS_MAXITER;
}
Exemple #27
0
LIS_INT lis_gmres(LIS_SOLVER solver)
{
	LIS_MATRIX A;
	LIS_VECTOR b,x;
	LIS_VECTOR r,s,z,*v;
	LIS_SCALAR *h;
	LIS_SCALAR aa,bb,rr,a2,b2,t;
	LIS_REAL tnrm2;

	LIS_REAL bnrm2,nrm2,tol;
	LIS_INT iter,maxiter,n,output;
	double time,ptime;

	LIS_REAL rnorm;
	LIS_INT i,j,k,m;
	LIS_INT ii,i1,iiv,i1v,iih,jj;
	LIS_INT h_dim;
	LIS_INT cs,sn;

	LIS_DEBUG_FUNC_IN;

	A       = solver->A;
	b       = solver->b;
	x       = solver->x;
	n       = A->n;
	maxiter = solver->options[LIS_OPTIONS_MAXITER];
	output  = solver->options[LIS_OPTIONS_OUTPUT];
	m       = solver->options[LIS_OPTIONS_RESTART];
	h_dim   = m+1;
	ptime   = 0.0;

	s       = solver->work[0];
	r       = solver->work[1];
	z       = solver->work[2];
	v       = &solver->work[3];

	h       = (LIS_SCALAR *)lis_malloc( sizeof(LIS_SCALAR)*(h_dim+1)*(h_dim+2),"lis_gmres::h" );
	cs      = (m+1)*h_dim;
	sn      = (m+2)*h_dim;

	/* r = M^-1 * (b - A * x) */
	lis_matvec(A,x,z);
	lis_vector_xpay(b,-1.0,z);
	lis_psolve(solver,z,v[0]);
	
	/* Initial Residual */
	if( lis_solver_get_initial_residual(solver,NULL,NULL,v[0],&bnrm2) )
	{
		lis_free(h);
		LIS_DEBUG_FUNC_OUT;
		return LIS_SUCCESS;
	}
	tol     = solver->tol;


	iter=0;
	while( iter<maxiter )
	{
		/* first column of V */
		/* v = r / ||r||_2 */
		lis_vector_nrm2(v[0],&rnorm);
		lis_vector_scale(1.0/rnorm,v[0]);

		/* s = ||r||_2 e_1 */
		lis_vector_set_all(0,s);
		s->value[0] = rnorm;

		i = 0;
		do
		{
			iter++;
			i++;
			ii  = i-1;
			i1  = i;
			iiv = i-1;
			i1v = i;
			iih = (i-1)*h_dim;


			/* z = M^-1 * v */
			time = lis_wtime();
			lis_psolve(solver,v[iiv],z);
			ptime += lis_wtime()-time;

			/* w = A * z */
			lis_matvec(A,z,v[i1v]);

			for(k=0;k<i;k++)
			{
				/* h[k,i]   = <w,v[k]>          */
				/* w        = w - h[k,i] * v[k] */
				lis_vector_dot(v[i1v],v[k],&t);
				h[k+iih] = t;
				lis_vector_axpy(-t,v[k],v[i1v]);
			}
			/* h[i+1,i] = ||w||          */
			/* v[i+1]   = w / h[i+1,i]   */
			lis_vector_nrm2(v[i1v],&tnrm2);
			h[i1+iih] = tnrm2;
			lis_vector_scale(1.0/tnrm2,v[i1v]);

			for(k=1;k<=ii;k++)
			{
				jj  =  k-1;
				t   =  h[jj+iih];
				aa  =  h[jj+cs]*t;
				aa +=  h[jj+sn]*h[k+iih];
				bb  = -h[jj+sn]*t;
				bb +=  h[jj+cs]*h[k+iih];
				h[jj+iih] = aa;
				h[k+iih] = bb;
			}
			aa = h[ii+iih];
			bb = h[i1+iih];
			a2 = aa*aa;
			b2 = bb*bb;
			rr = sqrt(a2+b2);
			if( rr==0.0 ) rr=1.0e-17;
			h[ii+cs] = aa/rr;
			h[ii+sn] = bb/rr;
			s->value[i1] = -h[ii+sn]*s->value[ii];
			s->value[ii] =  h[ii+cs]*s->value[ii];

			aa  =  h[ii+cs]*h[ii+iih];
			aa +=  h[ii+sn]*h[i1+iih];
			h[ii+iih] = aa;

			/* convergence check */
			nrm2 = sabs(s->value[i1])*bnrm2;

			if( output )
			{
				if( output & LIS_PRINT_MEM ) solver->rhistory[iter] = nrm2;
				if( output & LIS_PRINT_OUT && A->my_rank==0 ) lis_print_rhistory(iter,nrm2);
			}

			if( tol >= nrm2 ) break;
		} while( i<m && iter <maxiter );

		/* Solve H * Y = S for upper Hessenberg matrix H */
		s->value[ii] = s->value[ii]/h[ii+iih];
		for(k=1;k<=ii;k++)
		{
			jj = ii-k;
			t  = s->value[jj];
			for(j=jj+1;j<=ii;j++)
			{
				t -= h[jj+j*h_dim]*s->value[j];
			}
			s->value[jj] = t/h[jj+jj*h_dim];
		}
		/* z = z + y * v */
		#ifdef _OPENMP
		#pragma omp parallel for private(k)
		#endif
		for(k=0;k<n;k++)
		{
			z->value[k] = s->value[0]*v[0]->value[k];
		}
		for(j=1;j<=ii;j++)
		{
			lis_vector_axpy(s->value[j],v[j],z);
		}

		/* r = M^-1 * z */
		time = lis_wtime();
		lis_psolve(solver,z,r);
		ptime += lis_wtime()-time;

		/* x = x + r */
		lis_vector_axpy(1,r,x);

		if( tol >= nrm2 )
		{
			solver->retcode    = LIS_SUCCESS;
			solver->iter       = iter;
			solver->resid      = nrm2;
			solver->ptime      = ptime;
			lis_free(h);
			LIS_DEBUG_FUNC_OUT;
			return LIS_SUCCESS;
		}

		for(j=1;j<=i;j++)
		{
			jj = i1-j+1;
			s->value[jj-1] = -h[jj-1+sn]*s->value[jj];
			s->value[jj]   =  h[jj-1+cs]*s->value[jj];
		}

		for(j=0;j<=i1;j++)
		{
			t = s->value[j];
			if( j==0 ) t = t-1.0;
			lis_vector_axpy(t,v[j],v[0]);
		}
	}

	solver->retcode   = LIS_MAXITER;
	solver->iter      = iter+1;
	solver->resid     = nrm2;
	lis_free(h);
	LIS_DEBUG_FUNC_OUT;
	return LIS_MAXITER;
}
Exemple #28
0
LIS_INT lis_gmres_switch(LIS_SOLVER solver)
{
	LIS_MATRIX A;
	LIS_VECTOR b,x;
	LIS_VECTOR r,s,z,*v;
	LIS_QUAD *h;
	LIS_SCALAR *hd;
	LIS_QUAD_PTR aa,bb,rr,a2,b2,t,one,tmp;
	LIS_QUAD_PTR rnorm;
	LIS_REAL bnrm2,nrm2,tol,tol2;
	LIS_INT iter,maxiter,n,output;
	LIS_INT iter2,maxiter2;
	double time,ptime;

	LIS_INT i,j,k,m;
	LIS_INT ii,i1,iiv,i1v,iih,jj;
	LIS_INT h_dim;
	LIS_INT cs,sn;

	LIS_DEBUG_FUNC_IN;

	A       = solver->A;
	b       = solver->b;
	x       = solver->x;
	n       = A->n;
	maxiter  = solver->options[LIS_OPTIONS_MAXITER];
	maxiter2 = solver->options[LIS_OPTIONS_SWITCH_MAXITER];
	output   = solver->options[LIS_OPTIONS_OUTPUT];
	tol      = solver->params[LIS_PARAMS_RESID-LIS_OPTIONS_LEN];
	tol2     = solver->params[LIS_PARAMS_SWITCH_RESID-LIS_OPTIONS_LEN];
	m        = solver->options[LIS_OPTIONS_RESTART];
	h_dim    = m+1;
	ptime    = 0.0;

	s       = solver->work[0];
	r       = solver->work[1];
	z       = solver->work[2];
	v       = &solver->work[3];

	LIS_QUAD_SCALAR_MALLOC(aa,0,1);
	LIS_QUAD_SCALAR_MALLOC(bb,1,1);
	LIS_QUAD_SCALAR_MALLOC(rr,2,1);
	LIS_QUAD_SCALAR_MALLOC(a2,3,1);
	LIS_QUAD_SCALAR_MALLOC(b2,4,1);
	LIS_QUAD_SCALAR_MALLOC(t,5,1);
	LIS_QUAD_SCALAR_MALLOC(tmp,6,1);
	LIS_QUAD_SCALAR_MALLOC(one,7,1);
	LIS_QUAD_SCALAR_MALLOC(rnorm,8,1);

	h       = (LIS_QUAD *)lis_malloc( sizeof(LIS_QUAD)*(h_dim+1)*(h_dim+2),"lis_gmres_switch::h" );
	hd      = (LIS_SCALAR *)h;
	cs      = (m+1)*h_dim;
	sn      = (m+2)*h_dim;
	one.hi[0]   = 1.0;
	one.lo[0]   = 0.0;

	z->precision = LIS_PRECISION_DEFAULT;

	/* r = M^-1 * (b - A * x) */
	lis_matvec(A,x,z);
	lis_vector_xpay(b,-1.0,z);
	lis_psolve(solver,z,v[0]);

	/* Initial Residual */
	if( lis_solver_get_initial_residual(solver,NULL,NULL,v[0],&bnrm2) )
	{
		lis_free(h);
		LIS_DEBUG_FUNC_OUT;
		return LIS_SUCCESS;
	}
	tol2     = solver->tol_switch;


	iter=0;
	while( iter<maxiter2 )
	{
		/* first column of V */
		/* v = r / ||r||_2 */
		lis_vector_nrm2(v[0],&rnorm.hi[0]);
		lis_vector_scale(1.0/rnorm.hi[0],v[0]);

		/* s = ||r||_2 e_1 */
		lis_vector_set_all(0,s);
		s->value[0] = rnorm.hi[0];

		i = 0;
		do
		{
			iter++;
			i++;
			ii  = i-1;
			i1  = i;
			iiv = i-1;
			i1v = i;
			iih = (i-1)*h_dim;


			/* z = M^-1 * v */
			time = lis_wtime();
			lis_psolve(solver,v[iiv],z);
			ptime += lis_wtime()-time;

			/* w = A * z */
			lis_matvec(A,z, v[i1v]);

			for(k=0;k<i;k++)
			{
				/* h[k,i]   = <w,v[k]>          */
				/* w        = w - h[k,i] * v[k] */
				lis_vector_dot(v[i1v],v[k],&t.hi[0]);
				hd[k+iih] = t.hi[0];
				lis_vector_axpy(-t.hi[0],v[k],v[i1v]);
			}
			/* h[i+1,i] = ||w||          */
			/* v[i+1]   = w / h[i+1,i]   */
			lis_vector_nrm2(v[i1v],&t.hi[0]);
			hd[i1+iih] = t.hi[0];
			lis_vector_scale(1.0/t.hi[0],v[i1v]);

			for(k=1;k<=ii;k++)
			{
				jj        = k-1;
				t.hi[0]   =  hd[jj+iih];
				aa.hi[0]  =  hd[jj+cs]*t.hi[0];
				aa.hi[0] +=  hd[jj+sn]*hd[k+iih];
				bb.hi[0]  = -hd[jj+sn]*t.hi[0];
				bb.hi[0] +=  hd[jj+cs]*hd[k+iih];
				hd[jj+iih] = aa.hi[0];
				hd[k+iih] = bb.hi[0];
			}
			aa.hi[0] = hd[ii+iih];
			bb.hi[0] = hd[i1+iih];
			a2.hi[0] = aa.hi[0]*aa.hi[0];
			b2.hi[0] = bb.hi[0]*bb.hi[0];
			rr.hi[0] = sqrt(a2.hi[0]+b2.hi[0]);
			if( rr.hi[0]==0.0 ) rr.hi[0]=1.0e-17;
			hd[ii+cs] = aa.hi[0]/rr.hi[0];
			hd[ii+sn] = bb.hi[0]/rr.hi[0];
			s->value[i1] = -hd[ii+sn]*s->value[ii];
			s->value[ii] =  hd[ii+cs]*s->value[ii];

			aa.hi[0]  =  hd[ii+cs]*hd[ii+iih];
			aa.hi[0] +=  hd[ii+sn]*hd[i1+iih];
			hd[ii+iih] = aa.hi[0];

			/* convergence check */
			nrm2 = fabs(s->value[i1])*bnrm2;

			if( output )
			{
				if( output & LIS_PRINT_MEM ) solver->rhistory[iter] = nrm2;
				if( output & LIS_PRINT_OUT && A->my_rank==0 ) lis_print_rhistory(iter,nrm2);
			}

			if( tol2 >= nrm2 ) break;
		} while( i<m && iter <maxiter2 );

		/* Solve H * Y = S for upper Hessenberg matrix H */
		s->value[ii] = s->value[ii]/hd[ii+iih];
		for(k=1;k<=ii;k++)
		{
			jj = ii-k;
			t.hi[0]  = s->value[jj];
			for(j=jj+1;j<=ii;j++)
			{
				t.hi[0] -= hd[jj+j*h_dim]*s->value[j];
			}
			s->value[jj] = t.hi[0]/hd[jj+jj*h_dim];
		}
		/* z = z + y * v */
		for(k=0;k<n;k++)
		{
			z->value[k] = s->value[0]*v[0]->value[k];
		}
		for(j=1;j<=ii;j++)
		{
			lis_vector_axpy(s->value[j],v[j],z);
		}
		/* r = M^-1 * z */
		time = lis_wtime();
		lis_psolve(solver,z,r);
		ptime += lis_wtime()-time;

		/* x = x + r */
		lis_vector_axpy(1,r,x);

		if( tol2 >= nrm2 )
		{
			solver->iter       = iter;
			solver->iter2      = iter;
			solver->ptime      = ptime;
			break;
		}

		for(j=1;j<=i;j++)
		{
			jj = i1-j+1;
			s->value[jj-1] = -hd[jj-1+sn]*s->value[jj];
			s->value[jj]   =  hd[jj-1+cs]*s->value[jj];
		}

		for(j=0;j<=i1;j++)
		{
			t.hi[0] = s->value[j];
			if( j==0 ) t.hi[0] = t.hi[0]-1.0;
			lis_vector_axpy(t.hi[0],v[j],v[0]);
		}
	}

	/* Initial Residual */
	z->precision = LIS_PRECISION_QUAD;

	solver->options[LIS_OPTIONS_INITGUESS_ZEROS] = LIS_FALSE;
	lis_vector_copyex_mn(x,solver->xx);

	lis_solver_get_initial_residual(solver,NULL,NULL,v[0],&bnrm2);
	tol     = solver->tol;


	iter2=iter;
	while( iter2<maxiter )
	{
		/* first column of V */
		/* v = r / ||r||_2 */
		lis_vector_nrm2ex_mm(v[0],&rnorm);
		lis_quad_div((LIS_QUAD *)tmp.hi,(LIS_QUAD *)one.hi,(LIS_QUAD *)rnorm.hi);
		lis_vector_scaleex_mm(tmp,v[0]);

		/* s = ||r||_2 e_1 */
		lis_vector_set_allex_nm(0.0,s);
		s->value[0]    = rnorm.hi[0];
		s->value_lo[0] = rnorm.lo[0];

		i = 0;
		do
		{
			iter2++;
			i++;
			ii  = i-1;
			i1  = i;
			iiv = i-1;
			i1v = i;
			iih = (i-1)*h_dim;


			/* z = M^-1 * v */
			time = lis_wtime();
			lis_psolve(solver,v[iiv],z);
			ptime += lis_wtime()-time;

			/* w = A * z */
			lis_matvec(A,z, v[i1v]);

			for(k=0;k<i;k++)
			{
				/* h[k,i]   = <w,v[k]>          */
				/* w        = w - h[k,i] * v[k] */
				lis_vector_dotex_mmm(v[i1v],v[k],&t);
				h[k+iih].hi = t.hi[0];
				h[k+iih].lo = t.lo[0];
				lis_quad_minus((LIS_QUAD *)t.hi);
				lis_vector_axpyex_mmm(t,v[k],v[i1v]);
			}
			/* h[i+1,i] = ||w||          */
			/* v[i+1]   = w / h[i+1,i]   */
			lis_vector_nrm2ex_mm(v[i1v],&t);
			h[i1+iih].hi = t.hi[0];
			h[i1+iih].lo = t.lo[0];
			lis_quad_div((LIS_QUAD *)tmp.hi,(LIS_QUAD *)one.hi,(LIS_QUAD *)t.hi);
			lis_vector_scaleex_mm(tmp,v[i1v]);

			for(k=1;k<=ii;k++)
			{
				jj  = k-1;
				t.hi[0]   =  h[jj+iih].hi;
				t.lo[0]   =  h[jj+iih].lo;
				lis_quad_mul((LIS_QUAD *)aa.hi,(LIS_QUAD *)&h[jj+cs],(LIS_QUAD *)t.hi);
				lis_quad_mul((LIS_QUAD *)tmp.hi,(LIS_QUAD *)&h[jj+sn],(LIS_QUAD *)&h[k+iih]);
				lis_quad_add((LIS_QUAD *)aa.hi,(LIS_QUAD *)aa.hi,(LIS_QUAD *)tmp.hi);
				lis_quad_mul((LIS_QUAD *)bb.hi,(LIS_QUAD *)&h[jj+sn],(LIS_QUAD *)t.hi);
				lis_quad_minus((LIS_QUAD *)bb.hi);
				lis_quad_mul((LIS_QUAD *)tmp.hi,(LIS_QUAD *)&h[jj+cs],(LIS_QUAD *)&h[k+iih]);
				lis_quad_add((LIS_QUAD *)bb.hi,(LIS_QUAD *)bb.hi,(LIS_QUAD *)tmp.hi);
				h[jj+iih].hi = aa.hi[0];
				h[jj+iih].lo = aa.lo[0];
				h[k+iih].hi = bb.hi[0];
				h[k+iih].lo = bb.lo[0];
			}
			aa.hi[0] = h[ii+iih].hi;
			aa.lo[0] = h[ii+iih].lo;
			bb.hi[0] = h[i1+iih].hi;
			bb.lo[0] = h[i1+iih].lo;
			lis_quad_sqr((LIS_QUAD *)a2.hi,(LIS_QUAD *)aa.hi);
			lis_quad_sqr((LIS_QUAD *)b2.hi,(LIS_QUAD *)bb.hi);
			lis_quad_add((LIS_QUAD *)rr.hi,(LIS_QUAD *)a2.hi,(LIS_QUAD *)b2.hi);
			lis_quad_sqrt((LIS_QUAD *)rr.hi,(LIS_QUAD *)rr.hi);
			lis_quad_div((LIS_QUAD *)&h[ii+cs],(LIS_QUAD *)aa.hi,(LIS_QUAD *)rr.hi);
			lis_quad_div((LIS_QUAD *)&h[ii+sn],(LIS_QUAD *)bb.hi,(LIS_QUAD *)rr.hi);
			tmp.hi[0] = s->value[ii];
			tmp.lo[0] = s->value_lo[ii];
			lis_quad_mul((LIS_QUAD *)aa.hi,(LIS_QUAD *)&h[ii+sn],(LIS_QUAD *)tmp.hi);
			lis_quad_mul((LIS_QUAD *)bb.hi,(LIS_QUAD *)&h[ii+cs],(LIS_QUAD *)tmp.hi);
			lis_quad_minus((LIS_QUAD *)aa.hi);
			s->value[i1] = aa.hi[0];
			s->value_lo[i1] = aa.lo[0];
			s->value[ii] = bb.hi[0];
			s->value_lo[ii] = bb.lo[0];

			lis_quad_mul((LIS_QUAD *)aa.hi,(LIS_QUAD *)&h[ii+cs],(LIS_QUAD *)&h[ii+iih]);
			lis_quad_mul((LIS_QUAD *)tmp.hi,(LIS_QUAD *)&h[ii+sn],(LIS_QUAD *)&h[i1+iih]);
			lis_quad_add((LIS_QUAD *)aa.hi,(LIS_QUAD *)aa.hi,(LIS_QUAD *)tmp.hi);
			h[ii+iih].hi = aa.hi[0];
			h[ii+iih].lo = aa.lo[0];

			/* convergence check */
			nrm2 = fabs(s->value[i1])*bnrm2;

			if( output )
			{
				if( output & LIS_PRINT_MEM ) solver->rhistory[iter2] = nrm2;
				if( output & LIS_PRINT_OUT && A->my_rank==0 ) lis_print_rhistory(iter,nrm2);
			}

			if( tol >= nrm2 ) break;
		} while( i<m && iter2 <maxiter );

		/* Solve H * Y = S for upper Hessenberg matrix H */
		tmp.hi[0] = s->value[ii];
		tmp.lo[0] = s->value_lo[ii];
		lis_quad_div((LIS_QUAD *)tmp.hi,(LIS_QUAD *)tmp.hi,(LIS_QUAD *)&h[ii+iih]);
		s->value[ii] = tmp.hi[0];
		s->value_lo[ii] = tmp.lo[0];
		for(k=1;k<=ii;k++)
		{
			jj = ii-k;
			t.hi[0]  = s->value[jj];
			t.lo[0]  = s->value_lo[jj];
			for(j=jj+1;j<=ii;j++)
			{
				tmp.hi[0] = s->value[j];
				tmp.lo[0] = s->value_lo[j];
				lis_quad_mul((LIS_QUAD *)tmp.hi,(LIS_QUAD *)tmp.hi,(LIS_QUAD *)&h[jj+j*h_dim]);
				lis_quad_sub((LIS_QUAD *)t.hi,(LIS_QUAD *)t.hi,(LIS_QUAD *)tmp.hi);
			}
			lis_quad_div((LIS_QUAD *)tmp.hi,(LIS_QUAD *)t.hi,(LIS_QUAD *)&h[jj+jj*h_dim]);
			s->value[jj] = tmp.hi[0];
			s->value_lo[jj] = tmp.lo[0];
		}
		/* z = z + y * v */
		for(k=0;k<n;k++)
		{
			aa.hi[0] = s->value[0];
			aa.lo[0] = s->value_lo[0];
			bb.hi[0] = v[0]->value[k];
			bb.lo[0] = v[0]->value_lo[k];
			lis_quad_mul((LIS_QUAD *)tmp.hi,(LIS_QUAD *)aa.hi,(LIS_QUAD *)bb.hi);
			z->value[k] = tmp.hi[0];
			z->value_lo[k] = tmp.lo[0];
		}
		for(j=1;j<=ii;j++)
		{
			aa.hi[0] = s->value[j];
			aa.lo[0] = s->value_lo[j];
			lis_vector_axpyex_mmm(aa,v[j],z);
		}
		/* r = M^-1 * z */
		time = lis_wtime();
		lis_psolve(solver,z,r);
		ptime += lis_wtime()-time;

		/* x = x + r */
		lis_vector_axpyex_mmm(one,r,x);

		if( tol >= nrm2 )
		{
			solver->retcode    = LIS_SUCCESS;
			solver->iter       = iter2;
			solver->iter2      = iter;
			solver->resid      = nrm2;
			solver->ptime      = ptime;
			lis_free(h);
			LIS_DEBUG_FUNC_OUT;
			return LIS_SUCCESS;
		}

		for(j=1;j<=i;j++)
		{
			jj = i1-j+1;
			tmp.hi[0] = s->value[jj];
			tmp.lo[0] = s->value_lo[jj];
			lis_quad_mul((LIS_QUAD *)aa.hi,(LIS_QUAD *)tmp.hi,(LIS_QUAD *)&h[jj-1+sn]);
			lis_quad_mul((LIS_QUAD *)bb.hi,(LIS_QUAD *)tmp.hi,(LIS_QUAD *)&h[jj-1+cs]);
			lis_quad_minus((LIS_QUAD *)aa.hi);
			s->value[jj-1] = aa.hi[0];
			s->value_lo[jj-1] = aa.lo[0];
			s->value[jj] = bb.hi[0];
			s->value_lo[jj] = bb.lo[0];
		}

		for(j=0;j<=i1;j++)
		{
			t.hi[0] = s->value[j];
			t.lo[0] = s->value_lo[j];
			if( j==0 )
			{
				lis_quad_sub((LIS_QUAD *)t.hi,(LIS_QUAD *)t.hi,(LIS_QUAD *)one.hi);
			}
			lis_vector_axpyex_mmm(t,v[j],v[0]);
		}
	}

	solver->retcode   = LIS_MAXITER;
	solver->iter       = iter2+1;
	solver->iter2      = iter;
	solver->resid     = nrm2;
	lis_free(h);
	LIS_DEBUG_FUNC_OUT;
	return LIS_MAXITER;
}
Exemple #29
0
LIS_INT lis_orthomin_quad(LIS_SOLVER solver)
{
	LIS_Comm comm;  
	LIS_MATRIX A;
	LIS_PRECON M;
	LIS_VECTOR x;
	LIS_VECTOR r, rtld, *p, *ap, *aptld;
	LIS_QUAD *dotsave;
	LIS_QUAD_PTR alpha, beta, tmp, one;

	LIS_REAL bnrm2, nrm2, tol;
	LIS_INT iter,maxiter,output,conv;
	double time,ptime;

	LIS_INT m,l,lmax,ip,ip0;

	LIS_DEBUG_FUNC_IN;

	comm = LIS_COMM_WORLD;

	A       = solver->A;
	M       = solver->precon;
	x       = solver->x;
	maxiter = solver->options[LIS_OPTIONS_MAXITER];
	output  = solver->options[LIS_OPTIONS_OUTPUT];
	m       = solver->options[LIS_OPTIONS_RESTART];
	conv    = solver->options[LIS_OPTIONS_CONV_COND];
	ptime   = 0.0;

	LIS_QUAD_SCALAR_MALLOC(alpha,0,1);
	LIS_QUAD_SCALAR_MALLOC(beta,1,1);
	LIS_QUAD_SCALAR_MALLOC(tmp,3,1);
	LIS_QUAD_SCALAR_MALLOC(one,4,1);

	r       = solver->work[0];
	rtld    = solver->work[1];
	p       = &solver->work[2];
	ap      = &solver->work[  (m+1)+2];
	aptld   = &solver->work[2*(m+1)+2];

	one.hi[0] = 1.0;
	one.lo[0] = 0.0;

	dotsave = (LIS_QUAD *)lis_malloc( sizeof(LIS_QUAD) * (m+1),"lis_orthomin_quad::dotsave" );

	/* Initial Residual */
	if( lis_solver_get_initial_residual(solver,M,r,rtld,&bnrm2) )
	{
		LIS_DEBUG_FUNC_OUT;
		return LIS_SUCCESS;
	}
	tol     = solver->tol;

	
	iter=1;
	while( iter<=maxiter )
	{
		ip = (iter-1) % (m+1);

		/* p[ip] = rtld */
		lis_vector_copyex_mm(rtld,p[ip]);

		/* ap[ip]    = A*p[ip] */
		/* aptld[ip] = M^-1 ap[ip] */
		lis_matvec(A,p[ip],ap[ip]);
		time = lis_wtime();
		lis_psolve(solver, ap[ip], aptld[ip]);
		ptime += lis_wtime()-time;

		lmax = _min(m,iter-1);
		for(l=1;l<=lmax;l++)
		{
			ip0 = (ip+m+1-l) % (m+1);
			/* beta = -<Ar[ip],Ap[ip0]> / <Ap[ip0],Ap[ip0]> */
			lis_vector_dotex_mmm(aptld[ip],aptld[ip0],&beta);
			lis_quad_mul((LIS_QUAD *)beta.hi,(LIS_QUAD *)beta.hi,&dotsave[l-1]);
			lis_quad_minus((LIS_QUAD *)beta.hi);

			lis_vector_axpyex_mmm(beta,p[ip0]    ,p[ip]);
			lis_vector_axpyex_mmm(beta,ap[ip0]   ,ap[ip]);
			lis_vector_axpyex_mmm(beta,aptld[ip0],aptld[ip]);
		}
		for(l=m-1;l>0;l--)
		{
			dotsave[l] = dotsave[l-1];
		}

		lis_vector_dotex_mmm(aptld[ip],aptld[ip],&tmp);
		dotsave[0].hi = tmp.hi[0];
		dotsave[0].lo = tmp.lo[0];
		/* test breakdown */
		if( tmp.hi[0]==0.0 && tmp.lo[0]==0.0 )
		{
			solver->retcode   = LIS_BREAKDOWN;
			solver->iter      = iter;
			solver->resid     = nrm2;
			lis_free(dotsave);
			LIS_DEBUG_FUNC_OUT;
			return LIS_BREAKDOWN;
		}
		lis_quad_div(&dotsave[0],(LIS_QUAD *)one.hi,&dotsave[0]);

		/* alpha = <rtld,Aptld[ip]> */
		lis_vector_dotex_mmm(rtld,aptld[ip],&alpha);
		lis_quad_mul((LIS_QUAD *)alpha.hi,(LIS_QUAD *)alpha.hi,&dotsave[0]);

		lis_vector_axpyex_mmm( alpha,p[ip],x);
		lis_quad_minus((LIS_QUAD *)alpha.hi);
		lis_vector_axpyex_mmm(alpha,ap[ip],r);
		lis_vector_axpyex_mmm(alpha,aptld[ip],rtld);
		lis_quad_minus((LIS_QUAD *)alpha.hi);

		/* convergence check */
		lis_solver_get_residual[conv](r,solver,&nrm2);
		if( output )
		{
			if( output & LIS_PRINT_MEM ) solver->rhistory[iter] = nrm2;
			if( output & LIS_PRINT_OUT ) lis_print_rhistory(comm,iter,nrm2);
		}

		if( tol > nrm2 )
		{
			solver->retcode    = LIS_SUCCESS;
			solver->iter       = iter;
			solver->resid      = nrm2;
			solver->ptime      = ptime;
			lis_free(dotsave);
			LIS_DEBUG_FUNC_OUT;
			return LIS_SUCCESS;
		}

		iter++;
	}

	solver->retcode   = LIS_MAXITER;
	solver->iter      = iter;
	solver->resid     = nrm2;
	lis_free(dotsave);
	LIS_DEBUG_FUNC_OUT;
	return LIS_MAXITER;
}
void lis_matvect_msr(LIS_MATRIX A, LIS_SCALAR x[], LIS_SCALAR y[])
{
    LIS_INT i,j,js,je,jj;
    LIS_INT n,np;
    LIS_SCALAR t;
#ifdef _OPENMP
    LIS_INT k,is,ie,nprocs;
    LIS_SCALAR *w;
#endif

    n    = A->n;
    np   = A->np;

    if( A->is_splited )
    {
        for(i=0; i<n; i++)
        {
            y[i] = A->D->value[i] * x[i];
        }
        for(i=0; i<n; i++)
        {
            t = x[i];
            js = A->L->index[i];
            je = A->L->index[i+1];
            for(j=js; j<je; j++)
            {
                jj = A->L->index[j];
                y[jj] += A->L->value[j] * t;
            }
            js = A->U->index[i];
            je = A->U->index[i+1];
            for(j=js; j<je; j++)
            {
                jj = A->U->index[j];
                y[jj] += A->U->value[j] * t;
            }
        }
    }
    else
    {
#ifdef _OPENMP
        nprocs = omp_get_max_threads();
        w = (LIS_SCALAR *)lis_malloc( nprocs*np*sizeof(LIS_SCALAR),"lis_matvect_msr::w" );
        #pragma omp parallel private(i,j,js,je,t,jj,k)
        {
            k = omp_get_thread_num();
            #pragma omp for
            for(j=0; j<nprocs; j++)
            {
                memset( &w[j*np], 0, np*sizeof(LIS_SCALAR) );
            }
            #pragma omp for
            for(i=0; i<n; i++)
            {
                js = A->index[i];
                je = A->index[i+1];
                t = x[i];
                for(j=js; j<je; j++)
                {
                    jj  = k*np+A->index[j];
                    w[jj] += A->value[j] * t;
                }
                w[k*np+i] += A->value[i] * x[i];
            }
            #pragma omp for
            for(i=0; i<np; i++)
            {
                t = 0.0;
                for(j=0; j<nprocs; j++)
                {
                    t += w[j*np+i];
                }
                y[i] = t;
            }
        }
        lis_free(w);
#else
        for(i=0; i<n; i++)
        {
            y[i] = A->value[i] * x[i];
        }
        for(i=0; i<n; i++)
        {
            t = x[i];
            js = A->index[i];
            je = A->index[i+1];
            for(j=js; j<je; j++)
            {
                jj = A->index[j];
                y[jj] += A->value[j] * t;
            }
        }
#endif
    }
}