LIS_INT lis_vector_check(LIS_VECTOR v, LIS_INT level) { LIS_DEBUG_FUNC_IN; switch( level ) { case LIS_VECTOR_CHECK_NULL: if( !lis_is_malloc(v) ) { LIS_SETERR(LIS_ERR_ILL_ARG,"vector v is undefined\n"); return LIS_ERR_ILL_ARG; } break; default: if( !lis_is_malloc(v) ) { LIS_SETERR(LIS_ERR_ILL_ARG,"vector v is undefined\n"); return LIS_ERR_ILL_ARG; } if( v->status<=LIS_VECTOR_ASSEMBLING ) { LIS_SETERR(LIS_ERR_ILL_ARG,"vector v is assembling\n"); return LIS_ERR_ILL_ARG; } break; } LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; }
LIS_INT lis_input_mm_banner(FILE *file, LIS_INT *mmtype) { char buf[BUFSIZE]; char banner[64], mtx[64], fmt[64], dtype[64], dstruct[64]; char *p; LIS_DEBUG_FUNC_IN; /* check banner */ if( fgets(buf, BUFSIZE, file) == NULL ) { LIS_SETERR_FIO; return LIS_ERR_FILE_IO; } sscanf(buf, "%s %s %s %s %s", banner, mtx, fmt, dtype, dstruct); for(p=mtx;*p!='\0';p++) *p = (char)tolower(*p); for(p=fmt;*p!='\0';p++) *p = (char)tolower(*p); for(p=dtype;*p!='\0';p++) *p = (char)tolower(*p); for(p=dstruct;*p!='\0';p++) *p = (char)tolower(*p); if( strncmp(banner, MM_BANNER, strlen(MM_BANNER))!=0 || strncmp(mtx, MM_MTX, strlen(MM_MTX))!=0 ) { LIS_SETERR(LIS_ERR_FILE_IO,"Not Matrix Market banner\n"); return LIS_ERR_FILE_IO; } if( strncmp(fmt, MM_FMT, strlen(MM_FMT))!=0 ) { LIS_SETERR(LIS_ERR_FILE_IO,"Not Coodinate format\n"); return LIS_ERR_FILE_IO; } if( strncmp(dtype, MM_TYPE_REAL, strlen(MM_TYPE_REAL))!=0 ) { LIS_SETERR(LIS_ERR_FILE_IO,"Not real\n"); return LIS_ERR_FILE_IO; } if( strncmp(dstruct, MM_TYPE_GENERAL, strlen(MM_TYPE_GENERAL))==0 ) { *mmtype = MM_GENERAL; } else if( strncmp(dstruct, MM_TYPE_SYMM, strlen(MM_TYPE_SYMM))==0) { *mmtype = MM_SYMM; } else { LIS_SETERR(LIS_ERR_FILE_IO,"Not general or symmetric\n"); return LIS_ERR_FILE_IO; } LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; }
LIS_INT lis_output_vector(LIS_VECTOR v, LIS_INT format, char *filename) { LIS_INT err; LIS_DEBUG_FUNC_IN; #ifdef USE_MPI MPI_Barrier(v->comm); #endif err = lis_vector_check(v,LIS_VECTOR_CHECK_NULL); if( err ) return err; switch( format ) { case LIS_FMT_PLAIN: err = lis_output_vector_plain(v,filename); break; case LIS_FMT_MM: err = lis_output_vector_mm(v,filename); break; case LIS_FMT_LIS: err = lis_output_vector_lis_ascii(v,filename); break; default: LIS_SETERR(LIS_ERR_ILL_ARG,"ill format option\n"); return LIS_ERR_ILL_ARG; break; } LIS_DEBUG_FUNC_OUT; return err; }
LIS_INT lis_precon_register(char *name, LIS_PRECON_CREATE_XXX pcreate, LIS_PSOLVE_XXX psolve, LIS_PSOLVET_XXX psolvet) { LIS_DEBUG_FUNC_IN; if( precon_register_top==NULL ) { precon_register_top = (LIS_PRECON_REGISTER *)lis_malloc(LIS_PRECON_REGISTER_MAX*sizeof(struct LIS_PRECON_REGISTER_STRUCT),"lis_precon_register::precon_register_top"); } if( precon_register_type-LIS_PRECON_TYPE_USERDEF==LIS_PRECON_REGISTER_MAX ) { LIS_SETERR(LIS_FAILS,"lis_precon_resister is max\n"); return LIS_FAILS; } precon_register_top[precon_register_type-LIS_PRECON_TYPE_USERDEF].pcreate = pcreate; precon_register_top[precon_register_type-LIS_PRECON_TYPE_USERDEF].psolve = psolve; precon_register_top[precon_register_type-LIS_PRECON_TYPE_USERDEF].psolvet = psolvet; precon_register_top[precon_register_type-LIS_PRECON_TYPE_USERDEF].precon_type = precon_register_type; strncpy(precon_register_top[precon_register_type-LIS_PRECON_TYPE_USERDEF].name,name,LIS_PRECONNAME_MAX); precon_register_top[precon_register_type-LIS_PRECON_TYPE_USERDEF].name[LIS_PRECONNAME_MAX] = '\0'; precon_register_type++; LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; }
LIS_INT lis_input_vector(LIS_VECTOR v, char *filename) { LIS_INT fileformat; char buf[256],banner[128]; LIS_INT err; FILE *file; LIS_Comm comm; comm = v->comm; if( filename==NULL ) { LIS_SETERR(LIS_ERR_ILL_ARG,"filname is NULL\n"); return LIS_ERR_ILL_ARG; } file = fopen(filename, "r"); if( file==NULL ) { LIS_SETERR1(LIS_ERR_FILE_IO,"cannot open file %s\n",filename); return LIS_ERR_FILE_IO; } if( fgets(buf, 256, file) == NULL ) { fclose(file); return LIS_ERR_FILE_IO; } sscanf(buf, "%s", banner); if( strncmp(banner, MM_BANNER, strlen(MM_BANNER)) == 0) { fileformat = LIS_FMT_MM; } else if( strncmp(banner, LISBanner, strlen(LISBanner)) == 0) { fileformat = LIS_FMT_LIS; } else { fileformat = LIS_FMT_PLAIN; } rewind(file); switch( fileformat ) { case LIS_FMT_MM: err = lis_input_vector_mm(v,file); break; case LIS_FMT_LIS: err = lis_input_vector_lis(v,filename,file); break; case LIS_FMT_PLAIN: err = lis_input_vector_plain(v,file); break; } fclose(file); #ifdef USE_MPI MPI_Barrier(comm); #endif return err; }
LIS_INT lis_vector_duplicate(void *vin, LIS_VECTOR *vout) { LIS_INT precision,err; LIS_DEBUG_FUNC_IN; precision = LIS_PRECISION_DEFAULT; if( ((LIS_VECTOR)vin)->label==LIS_LABEL_VECTOR) { precision = ((LIS_VECTOR)vin)->precision; } else if( ((LIS_VECTOR)vin)->label!=LIS_LABEL_MATRIX) { LIS_SETERR(LIS_ERR_ILL_ARG, "First argument is not LIS_VECTOR or LIS_MATRIX\n"); return LIS_ERR_ILL_ARG; } err = lis_vector_duplicateex(precision,vin,vout); LIS_DEBUG_FUNC_OUT; return err; }
LIS_INT lis_input_mm_size(FILE *file, LIS_INT *nr, LIS_INT *nc, LIS_INT *nnz, LIS_INT *isb, LIS_INT *isx, LIS_INT *isbin) { char buf[BUFSIZE]; LIS_INT err; LIS_DEBUG_FUNC_IN; /* check size */ do { if( fgets(buf, BUFSIZE, file) == NULL ) { LIS_SETERR_FIO; return LIS_ERR_FILE_IO; } }while( buf[0]=='%' ); #ifdef _LONGLONG err = sscanf(buf, "%lld %lld %lld %lld %lld %lld", nr, nc, nnz, isb, isx, isbin); #else err = sscanf(buf, "%d %d %d %d %d %d", nr, nc, nnz, isb, isx, isbin); #endif if( err==3 ) { *isb = 0; *isx = 0; *isbin = 0; } else if( err==5 ) { *isbin = 0; } if( *nr!=*nc ) { LIS_SETERR(LIS_ERR_FILE_IO,"matrix is not square\n"); return LIS_ERR_FILE_IO; } LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; }
LIS_INT lis_input(LIS_MATRIX A, LIS_VECTOR b, LIS_VECTOR x, char *filename) { LIS_INT err; LIS_INT fileformat; char buf[256],banner[128]; FILE *file; LIS_DEBUG_FUNC_IN; err = lis_matrix_check(A,LIS_MATRIX_CHECK_NULL); if( err ) return err; if( b!=NULL && x!=NULL ) { err = lis_vector_check(b,LIS_VECTOR_CHECK_NULL); if( err ) return err; err = lis_vector_check(x,LIS_VECTOR_CHECK_NULL); if( err ) return err; } if( filename==NULL ) { LIS_SETERR(LIS_ERR_ILL_ARG,"filname is NULL\n"); return LIS_ERR_ILL_ARG; } file = fopen(filename, "r"); if( file==NULL ) { LIS_SETERR1(LIS_ERR_FILE_IO,"cannot open file %s\n",filename); return LIS_ERR_FILE_IO; } /* file format check */ if( fgets(buf, 256, file) == NULL ) { fclose(file); return LIS_ERR_FILE_IO; } sscanf(buf, "%s", banner); if( strncmp(banner, MM_BANNER, strlen(MM_BANNER)) == 0) { fileformat = LIS_FMT_MM; } /* else if( strncmp(banner, LISBanner, strlen(LISBanner)) == 0) { fileformat = LIS_FMT_LIS; } else if( strncmp(banner, ITBLBanner, strlen(ITBLBanner)) == 0) { fileformat = LIS_FMT_ITBL; } */ else { fileformat = LIS_FMT_HB; } rewind(file); /* if( fileformat==LIS_FMT_FREE ) { fclose(file); err = lis_input_option(&option, filename); if( err ) return err; file = fopen(option.filename, "r"); if( file==NULL ) { LIS_SETERR1(LIS_ERR_FILE_IO,"cannot open file %s\n",filename); return LIS_ERR_FILE_IO; } } */ switch( fileformat ) { case LIS_FMT_MM: err = lis_input_mm(A,b,x,file); break; case LIS_FMT_HB: err = lis_input_hb(A,b,x,file); break; /* case LIS_FMT_ITBL: err = lis_input_mmm(A,b,x,file,comm,matrix_type,bnr,bnc,row,col); break; case LIS_FMT_LIS: err = lis_input_lis(A,b,x,filename,file,comm,matrix_type,bnr,bnc,row,col); break; case LIS_FMT_FREE: err = lis_input_free(A,b,x,option,file,comm,matrix_type,bnr,bnc,row,col); break; */ default: fclose(file); return err; } fclose(file); #ifdef USE_MPI MPI_Barrier(A->comm); #endif LIS_DEBUG_FUNC_OUT; return err; }
LIS_INT lis_input_vector_lis(LIS_VECTOR v, char *filename, FILE *file) { LIS_INT err; char buf[BUFSIZE],banner[128],mode[128],mattype[128]; LIS_INT in_mode; if( fgets(buf, BUFSIZE, file) == NULL ) { LIS_SETERR_FIO; return LIS_ERR_FILE_IO; } buf[10] = '\0'; sscanf(buf, "%s %s %s", banner, mode, mattype); if( strncmp(banner, LISBanner, strlen(LISBanner)) != 0) { LIS_SETERR(LIS_ERR_FILE_IO,"not lis file format\n"); return LIS_ERR_FILE_IO; } in_mode = LIS_FMT_LIS_ASCII; if( mode[0]=='B' || mode[0]=='L' ) { fclose(file); file = fopen(filename, "rb"); if( file==NULL ) { LIS_SETERR1(LIS_ERR_FILE_IO,"cannot open file %s\n", filename); return LIS_ERR_FILE_IO; } err = fread(buf, sizeof(char), 10, file); if( err ) { return err; } in_mode = 1; in_mode = *(char *)&in_mode; if( (in_mode==LIS_BINARY_BIG && mode[0]=='L') || (in_mode==LIS_BINARY_LITTLE && mode[0]=='B') ) { in_mode = LIS_TRUE; } else { in_mode = LIS_FALSE; } } if( strncmp(mattype, "vec", 3) == 0 ) { if( in_mode==LIS_FMT_LIS_ASCII ) { lis_input_vector_lis_ascii(v,file); } else { LIS_SETERR_IMP; return LIS_ERR_NOT_IMPLEMENTED; } } else { LIS_SETERR(LIS_ERR_FILE_IO,"not lis file format\n"); return LIS_ERR_FILE_IO; } return LIS_SUCCESS; }
LIS_INT lis_input_vector_mm(LIS_VECTOR v, FILE *file) { char buf[BUFSIZE]; char banner[64], mtx[64], fmt[64], dtype[64], dstruct[64]; char *p; LIS_INT i; LIS_INT err; LIS_INT n,is,ie; LIS_INT idx; LIS_SCALAR val; /* check banner */ if( fgets(buf, BUFSIZE, file) == NULL ) { LIS_SETERR_FIO; return LIS_ERR_FILE_IO; } sscanf(buf, "%s %s %s %s %s", banner, mtx, fmt, dtype, dstruct); for(p=mtx;*p!='\0';p++) *p = (char)tolower(*p); for(p=fmt;*p!='\0';p++) *p = (char)tolower(*p); for(p=dtype;*p!='\0';p++) *p = (char)tolower(*p); for(p=dstruct;*p!='\0';p++) *p = (char)tolower(*p); if( strncmp(banner, MM_BANNER, strlen(MM_BANNER))!=0 || strncmp(mtx, MM_VEC, strlen(MM_VEC))!=0 ) { LIS_SETERR(LIS_ERR_FILE_IO,"Not Matrix Market banner\n"); return LIS_ERR_FILE_IO; } if( strncmp(fmt, MM_FMT, strlen(MM_FMT))!=0 ) { LIS_SETERR(LIS_ERR_FILE_IO,"Not Coodinate format\n"); return LIS_ERR_FILE_IO; } if( strncmp(dtype, MM_TYPE_REAL, strlen(MM_TYPE_REAL))!=0 ) { LIS_SETERR(LIS_ERR_FILE_IO,"Not real\n"); return LIS_ERR_FILE_IO; } if( strncmp(dstruct, MM_TYPE_GENERAL, strlen(MM_TYPE_GENERAL))!=0 ) { LIS_SETERR(LIS_ERR_FILE_IO,"Not general\n"); return LIS_ERR_FILE_IO; } /* check size */ do { if( fgets(buf, BUFSIZE, file) == NULL ) { LIS_SETERR_FIO; return LIS_ERR_FILE_IO; } }while( buf[0]=='%' ); #ifdef _LONGLONG if( sscanf(buf, "%lld", &n) != 1 ) #else if( sscanf(buf, "%d", &n) != 1 ) #endif { LIS_SETERR_FIO; return LIS_ERR_FILE_IO; } /* read data */ err = lis_vector_set_size(v,0,n); if( err ) { return err; } lis_vector_get_range(v,&is,&ie); for(i=0;i<n;i++) { if( fgets(buf, BUFSIZE, file) == NULL ) { LIS_SETERR_FIO; return LIS_ERR_FILE_IO; } #ifdef _LONGLONG #ifdef _LONG__DOUBLE if( sscanf(buf, "%lld %Lg", &idx, &val) != 2 ) #else if( sscanf(buf, "%lld %lg", &idx, &val) != 2 ) #endif #else #ifdef _LONG__DOUBLE if( sscanf(buf, "%d %Lg", &idx, &val) != 2 ) #else if( sscanf(buf, "%d %lg", &idx, &val) != 2 ) #endif #endif { LIS_SETERR_FIO; return LIS_ERR_FILE_IO; } idx--; if( idx>=is && idx<ie ) { v->value[idx-is] = val; } } return LIS_SUCCESS; }
LIS_INT lis_vector_duplicateex(LIS_INT precision, void *A, LIS_VECTOR *vout) { LIS_INT np,pad; LIS_INT nprocs; LIS_INT i; #ifdef USE_MPI LIS_INT *ranges; #endif LIS_SCALAR *value; LIS_DEBUG_FUNC_IN; if( ((LIS_VECTOR)A)->label!=LIS_LABEL_VECTOR && ((LIS_VECTOR)A)->label!=LIS_LABEL_MATRIX) { LIS_SETERR(LIS_ERR_ILL_ARG, "Second argument is not LIS_VECTOR or LIS_MATRIX\n"); return LIS_ERR_ILL_ARG; } nprocs = ((LIS_VECTOR)A)->nprocs; np = ((LIS_VECTOR)A)->np; pad = ((LIS_VECTOR)A)->pad; *vout = NULL; *vout = (LIS_VECTOR)lis_malloc( sizeof(struct LIS_VECTOR_STRUCT),"lis_vector_duplicateex::vout" ); if( NULL==*vout ) { LIS_SETERR_MEM(sizeof(struct LIS_VECTOR_STRUCT)); return LIS_OUT_OF_MEMORY; } lis_vector_init(vout); if( !precision ) { value = (LIS_SCALAR *)lis_malloc( (np+pad)*sizeof(LIS_SCALAR),"lis_vector_duplicateex::value" ); if( NULL==value ) { LIS_SETERR_MEM((np+pad)*sizeof(LIS_SCALAR)); lis_vector_destroy(*vout); *vout = NULL; return LIS_OUT_OF_MEMORY; } (*vout)->value = value; #ifdef _OPENMP #pragma omp parallel for private(i) #endif for(i=0;i<np+pad;i++) { (*vout)->value[i] = 0.0; } } else { value = (LIS_SCALAR *)lis_malloc( (2*(np+pad) + (np+pad)%2)*sizeof(LIS_SCALAR),"lis_vector_duplicateex::value" ); if( NULL==value ) { LIS_SETERR_MEM((2*(np+pad) + (np+pad)%2)*sizeof(LIS_SCALAR)); lis_vector_destroy(*vout); *vout = NULL; return LIS_OUT_OF_MEMORY; } (*vout)->value = value; (*vout)->value_lo = value + np+pad + (np+pad)%2; (*vout)->work = (LIS_SCALAR *)lis_malloc( 32*sizeof(LIS_SCALAR),"lis_vector_duplicateex::vout->work" ); if( NULL==(*vout)->work ) { LIS_SETERR_MEM(32*sizeof(LIS_SCALAR)); lis_vector_destroy(*vout); *vout = NULL; return LIS_OUT_OF_MEMORY; } #ifdef USE_VEC_COMP #pragma cdir nodep #endif #ifdef _OPENMP #pragma omp parallel for private(i) #endif for(i=0;i<np+pad;i++) { (*vout)->value[i] = 0.0; (*vout)->value_lo[i] = 0.0; } } #ifdef USE_MPI ranges = (LIS_INT *)lis_malloc( (nprocs+1)*sizeof(LIS_INT),"lis_vector_duplicateex::ranges" ); if( ranges==NULL ) { LIS_SETERR_MEM((nprocs+1)*sizeof(LIS_INT)); lis_vector_destroy(*vout); *vout = NULL; return LIS_OUT_OF_MEMORY; } for(i=0;i<nprocs+1;i++) ranges[i] = ((LIS_VECTOR)A)->ranges[i]; (*vout)->ranges = ranges; #else (*vout)->ranges = NULL; #endif (*vout)->is_copy = LIS_TRUE; (*vout)->status = LIS_VECTOR_ASSEMBLED; (*vout)->precision = precision; (*vout)->n = ((LIS_VECTOR)A)->n; (*vout)->gn = ((LIS_VECTOR)A)->gn; (*vout)->np = ((LIS_VECTOR)A)->np; (*vout)->pad = ((LIS_VECTOR)A)->pad; (*vout)->comm = ((LIS_VECTOR)A)->comm; (*vout)->my_rank = ((LIS_VECTOR)A)->my_rank; (*vout)->nprocs = ((LIS_VECTOR)A)->nprocs; (*vout)->is = ((LIS_VECTOR)A)->is; (*vout)->ie = ((LIS_VECTOR)A)->ie; (*vout)->origin = ((LIS_VECTOR)A)->origin; (*vout)->is_destroy = ((LIS_VECTOR)A)->is_destroy; LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; }
LIS_INT lis_esolver_output_rhistory(LIS_ESOLVER esolver, char *filename) { LIS_INT i,maxiter; #ifdef USE_MPI LIS_INT my_rank,err; #endif FILE *file; LIS_DEBUG_FUNC_IN; maxiter = esolver->iter+1; if( esolver->retcode!=LIS_SUCCESS ) { maxiter--; } #ifdef USE_MPI if( esolver->residual==NULL ) { LIS_SETERR(LIS_FAILS,"eigensolver's residual history is empty\n"); return LIS_FAILS; } if( esolver->A==NULL ) { LIS_SETERR(LIS_FAILS,"matrix A is NULL\n"); return LIS_FAILS; } MPI_Barrier(esolver->A->comm); my_rank = esolver->A->my_rank; err = 0; if( my_rank==0 ) { file = fopen(filename, "w"); if( file==NULL ) { LIS_SETERR1(LIS_ERR_FILE_IO,"cannot open file %s\n", filename); err = 1; } else { for(i=0;i<maxiter;i++) { #ifdef _LONG__DOUBLE fprintf(file, "%Le\n", esolver->residual[i]); #else fprintf(file, "%e\n", esolver->residual[i]); #endif } fclose(file); } } MPI_Barrier(esolver->A->comm); LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; #else if( esolver->residual==NULL ) { LIS_SETERR(LIS_FAILS,"eigensolver's residual history is empty\n"); return LIS_FAILS; } file = fopen(filename, "w"); if( file==NULL ) { LIS_SETERR1(LIS_ERR_FILE_IO,"cannot open file %s\n", filename); return LIS_ERR_FILE_IO; } for(i=0;i<maxiter;i++) { #ifdef _LONG__DOUBLE fprintf(file, "%Le\n", esolver->residual[i]); #else fprintf(file, "%e\n", esolver->residual[i]); #endif } fclose(file); LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; #endif }
LIS_INT lis_input_hb_csr(LIS_MATRIX A, LIS_VECTOR b, LIS_VECTOR x, FILE *file) { char buf[BUFSIZE]; char title[128], key[128], mtx[64], dat[128]; char *p; char MXTYPE_F,MXTYPE_S,MXTYPE_T; char RHSTYP_F,RHSTYP_S,RHSTYP_T; LIS_INT TOTCRD,PTRCRD,INDCRD,VALCRD,RHSCRD; LIS_INT NROW,NCOL,NNZERO,NELTVL; LIS_INT NRHS,NRHSIX; LIS_INT iptr,iind,ival,irhs; LIS_INT wptr,wind,wval,wrhs; LIS_INT i,k,j,my_rank; LIS_INT err; LIS_INT n,is,ie; LIS_INT *ptr, *index; LIS_INT matrix_type; LIS_SCALAR *value; LIS_MATRIX B; #ifdef USE_MPI MPI_Comm_rank(A->comm,&my_rank); #else my_rank = 0; #endif matrix_type = A->matrix_type; /* Line 1 */ if( fgets(buf, BUFSIZE, file) == NULL ) { LIS_SETERR_FIO; return LIS_ERR_FILE_IO; } strncpy(title, buf ,72); title[72] = '\0'; strncpy(key ,&buf[72], 8); key[8] = '\0'; printf("title: %s\n",title); printf("key : %s\n",key); /* Line 2 */ if( fgets(buf, BUFSIZE, file) == NULL ) { LIS_SETERR_FIO; return LIS_ERR_FILE_IO; } #ifdef _LONGLONG if( sscanf(buf, "%14lld%14lld%14lld%14lld%14lld", &TOTCRD, &PTRCRD, &INDCRD, &VALCRD, &RHSCRD) != 5 ) #else if( sscanf(buf, "%14d%14d%14d%14d%14d", &TOTCRD, &PTRCRD, &INDCRD, &VALCRD, &RHSCRD) != 5 ) #endif { LIS_SETERR_FIO; return LIS_ERR_FILE_IO; } #ifdef _LONGLONG printf("%14lld%14lld%14lld%14lld%14lld\n",TOTCRD, PTRCRD, INDCRD, VALCRD, RHSCRD); #else printf("%14d%14d%14d%14d%14d\n",TOTCRD, PTRCRD, INDCRD, VALCRD, RHSCRD); #endif /* Line 3 */ if( fgets(buf, BUFSIZE, file) == NULL ) { LIS_SETERR_FIO; return LIS_ERR_FILE_IO; } #ifdef _LONGLONG if( sscanf(buf, "%s %lld %lld %lld %lld", mtx, &NROW, &NCOL, &NNZERO, &NELTVL) != 5 ) #else if( sscanf(buf, "%s %d %d %d %d", mtx, &NROW, &NCOL, &NNZERO, &NELTVL) != 5 ) #endif { LIS_SETERR_FIO; return LIS_ERR_FILE_IO; } for(p=mtx;*p!='\0';p++) *p = (char)tolower(*p); MXTYPE_F = mtx[0]; MXTYPE_S = mtx[1]; MXTYPE_T = mtx[2]; if( mtx[0]!='r' ) { LIS_SETERR(LIS_ERR_FILE_IO,"Not real\n"); return LIS_ERR_FILE_IO; } /* if( mtx[1]!='u' ) { LIS_SETERR(LIS_ERR_FILE_IO,"Not unsymmetric\n"); return LIS_ERR_FILE_IO; } */ if( mtx[2]!='a' ) { LIS_SETERR(LIS_ERR_FILE_IO,"Not assembled\n"); return LIS_ERR_FILE_IO; } if( NROW!=NCOL ) { LIS_SETERR(LIS_ERR_FILE_IO,"matrix is not square\n"); return LIS_ERR_FILE_IO; } #ifdef _LONGLONG printf("%c%c%c %lld %lld %lld %lld\n",MXTYPE_F, MXTYPE_S, MXTYPE_T, NROW, NCOL, NNZERO, NELTVL); #else printf("%c%c%c %d %d %d %d\n",MXTYPE_F, MXTYPE_S, MXTYPE_T, NROW, NCOL, NNZERO, NELTVL); #endif /* Line 4 */ if( fgets(buf, BUFSIZE, file) == NULL ) { LIS_SETERR_FIO; return LIS_ERR_FILE_IO; } lis_input_hb_get_fmt( buf ,16,&iptr,&wptr); lis_input_hb_get_fmt(&buf[16],16,&iind,&wind); lis_input_hb_get_fmt(&buf[32],20,&ival,&wval); lis_input_hb_get_fmt(&buf[52],20,&irhs,&wrhs); #ifdef _LONGLONG printf("%lld %lld %lld %lld\n",iptr,iind,ival,irhs); printf("%lld %lld %lld %lld\n",wptr,wind,wval,wrhs); #else printf("%d %d %d %d\n",iptr,iind,ival,irhs); printf("%d %d %d %d\n",wptr,wind,wval,wrhs); #endif /* Line 5 */ if( RHSCRD!=0 ) { if( fgets(buf, BUFSIZE, file) == NULL ) { LIS_SETERR_FIO; return LIS_ERR_FILE_IO; } #ifdef _LONGLONG sscanf(buf, "%s %lld %lld", mtx, &NRHS, &NRHSIX); #else sscanf(buf, "%s %d %d", mtx, &NRHS, &NRHSIX); #endif /* #ifdef _LONGLONG if( sscanf(buf, "%s %lld %lld", mtx, &NRHS, &NRHSIX) != 3 ) #else if( sscanf(buf, "%s %d %d", mtx, &NRHS, &NRHSIX) != 3 ) #endif { LIS_SETERR_FIO; return LIS_ERR_FILE_IO; } */ for(p=mtx;*p!='\0';p++) *p = (char)tolower(*p); RHSTYP_F = mtx[0]; RHSTYP_S = mtx[1]; RHSTYP_T = mtx[2]; #ifdef _LONGLONG printf("%c%c%c %lld %lld\n",RHSTYP_F, RHSTYP_S, RHSTYP_T, NRHS, NRHSIX); #else printf("%c%c%c %d %d\n",RHSTYP_F, RHSTYP_S, RHSTYP_T, NRHS, NRHSIX); #endif } err = lis_matrix_set_size(A,0,NROW); if( err ) { return err; } n = A->n; lis_matrix_get_range(A,&is,&ie); err = lis_matrix_malloc_csr(n,NNZERO,&ptr,&index,&value); if( err ) { return err; } /* read data */ k = 0; for( i=0; i<PTRCRD; i++ ) { if( fgets(buf, BUFSIZE, file) == NULL ) { LIS_SETERR_FIO; return LIS_ERR_FILE_IO; } p = buf; for(j=0;j<iptr&&k<n+1;j++) { strncpy(dat, p, wptr); dat[wptr] = '\0'; ptr[k] = atoi(dat) - 1; p += wptr; k++; } } k = 0; for( i=0; i<INDCRD; i++ ) { if( fgets(buf, BUFSIZE, file) == NULL ) { LIS_SETERR_FIO; return LIS_ERR_FILE_IO; } p = buf; for(j=0;j<iind&&k<NNZERO;j++) { strncpy(dat, p, wind); dat[wind] = '\0'; index[k] = atoi(dat) - 1; p += wind; k++; } } k = 0; for( i=0; i<VALCRD; i++ ) { if( fgets(buf, BUFSIZE, file) == NULL ) { LIS_SETERR_FIO; return LIS_ERR_FILE_IO; } p = buf; for(j=0;j<ival&&k<NNZERO;j++) { strncpy(dat, p, wval); dat[wval] = '\0'; value[k] = atof(dat); p += wval; k++; } } if( RHSCRD>0 ) { /* k = 0; for( i=0; i<RHSCRD; i++ ) { if( fgets(buf, BUFSIZE, file) == NULL ) { LIS_SETERR_FIO; return LIS_ERR_FILE_IO; } p = buf; for(j=0;j<ival&&k<NNZERO;j++) { strncpy(dat, p, wval); dat[wval] = '\0'; value[k] = atof(dat); p += wval; printf("%e ",value[k]); k++; } printf("\n"); } */ } err = lis_matrix_set_csc(NNZERO,ptr,index,value,A); if( err ) { return err; } err = lis_matrix_assemble(A); if( err ) return err; if( matrix_type!=LIS_MATRIX_CSC ) { err = lis_matrix_duplicate(A,&B); if( err ) return err; lis_matrix_set_type(B,LIS_MATRIX_CSR); err = lis_matrix_convert_csc2csr(A,B); if( err ) return err; lis_matrix_storage_destroy(A); lis_matrix_DLU_destroy(A); lis_matrix_diag_destroy(A->WD); if( A->l2g_map ) lis_free( A->l2g_map ); if( A->commtable ) lis_commtable_destroy( A->commtable ); if( A->ranges ) lis_free( A->ranges ); err = lis_matrix_copy_struct(B,A); if( err ) return err; lis_free(B); } return LIS_SUCCESS; }
LIS_INT lis_esolve(LIS_MATRIX A, LIS_VECTOR x, LIS_SCALAR *evalue0, LIS_ESOLVER esolver) { LIS_INT nesolver,niesolver,emaxiter; LIS_SCALAR *evalue; LIS_VECTOR *evector; LIS_SCALAR *resid; LIS_SCALAR *rhistory; LIS_INT *iter,*iter2; LIS_INT err; LIS_INT output; LIS_INT ss, mode; double time; double gshift; LIS_INT estorage,eblock; LIS_MATRIX B; LIS_INT eprecision; LIS_VECTOR xx; LIS_DEBUG_FUNC_IN; /* begin parameter check */ err = lis_matrix_check(A,LIS_MATRIX_CHECK_ALL); if( err ) return err; if( x==NULL ) { LIS_SETERR(LIS_ERR_ILL_ARG,"vector x is undefined\n"); return LIS_ERR_ILL_ARG; } if( A->n!=x->n ) { return LIS_ERR_ILL_ARG; } if( A->gn<=0 ) { LIS_SETERR1(LIS_ERR_ILL_ARG,"Size n(=%d) of matrix A is less than 0\n",A->gn); return LIS_ERR_ILL_ARG; } nesolver = esolver->options[LIS_EOPTIONS_ESOLVER]; niesolver = esolver->options[LIS_EOPTIONS_INNER_ESOLVER]; ss = esolver->options[LIS_EOPTIONS_SUBSPACE]; mode = esolver->options[LIS_EOPTIONS_MODE]; emaxiter = esolver->options[LIS_EOPTIONS_MAXITER]; gshift = esolver->params[LIS_EPARAMS_SHIFT - LIS_EOPTIONS_LEN]; output = esolver->options[LIS_EOPTIONS_OUTPUT]; estorage = esolver->options[LIS_EOPTIONS_STORAGE]; eblock = esolver->options[LIS_EOPTIONS_STORAGE_BLOCK]; eprecision = esolver->options[LIS_EOPTIONS_PRECISION]; esolver->eprecision = eprecision; if( nesolver < 1 || nesolver > LIS_ESOLVER_LEN ) { LIS_SETERR2(LIS_ERR_ILL_ARG,"Parameter LIS_EOPTIONS_ESOLVER is %d (Set between 1 to %d)\n",nesolver, LIS_ESOLVER_LEN); return LIS_ERR_ILL_ARG; } if( niesolver < 1 || niesolver > 7 ) { LIS_SETERR1(LIS_ERR_ILL_ARG,"Parameter LIS_EOPTIONS_INNER_ESOLVER is %d (Set between 1 to 7)\n", niesolver); return LIS_ERR_ILL_ARG; } if ( esolver->options[LIS_EOPTIONS_ESOLVER] == LIS_ESOLVER_SI && niesolver > 4 ) { LIS_SETERR1(LIS_ERR_ILL_ARG,"Parameter LIS_EOPTIONS_INNER_ESOLVER is %d (Set between 1 to 4 for Subspace)\n", niesolver); return LIS_ERR_ILL_ARG; } if ( esolver->options[LIS_EOPTIONS_ESOLVER] == LIS_ESOLVER_LI && niesolver == LIS_ESOLVER_PI ) { LIS_SETERR1(LIS_ERR_ILL_ARG,"Parameter LIS_EOPTIONS_INNER_ESOLVER is %d (Set between 2 to 7 for Lanczos)\n", niesolver); return LIS_ERR_ILL_ARG; } if ( esolver->options[LIS_EOPTIONS_ESOLVER] == LIS_ESOLVER_AI && (( niesolver == LIS_ESOLVER_PI ) || ( niesolver == LIS_ESOLVER_CG) || ( niesolver == LIS_ESOLVER_JD)) ) { LIS_SETERR1(LIS_ERR_ILL_ARG,"Parameter LIS_EOPTIONS_INNER_ESOLVER is %d (Set between 2 to 4 or 6 for Arnoldi)\n", niesolver); return LIS_ERR_ILL_ARG; } if ( esolver->options[LIS_EOPTIONS_ESOLVER] == LIS_ESOLVER_SI && ss > A->gn ) { LIS_SETERR2(LIS_ERR_ILL_ARG,"Parameter LIS_EOPTIONS_SUBSPACE is %d (Set less than or equal to matrix size %d for Subspace)\n", ss, A->gn); return LIS_ERR_ILL_ARG; } if (( esolver->options[LIS_EOPTIONS_ESOLVER] == LIS_ESOLVER_LI || esolver->options[LIS_EOPTIONS_ESOLVER] == LIS_ESOLVER_AI ) && ss > A->gn ) { LIS_SETERR2(LIS_ERR_ILL_ARG,"Parameter LIS_EOPTIONS_SUBSPACE is %d (Set less than or equal to matrix size %d for Lanczos and Arnoldi)\n", ss, A->gn); return LIS_ERR_ILL_ARG; } if ( esolver->options[LIS_EOPTIONS_ESOLVER] == LIS_ESOLVER_SI && mode >= ss ) { LIS_SETERR2(LIS_ERR_ILL_ARG,"Parameter LIS_EOPTIONS_MODE is %d (Set less than subspace size %d for Subspace)\n", mode, ss); return LIS_ERR_ILL_ARG; } if ( esolver->options[LIS_EOPTIONS_ESOLVER] == ( LIS_ESOLVER_LI || LIS_ESOLVER_AI ) && mode >= ss ) { LIS_SETERR2(LIS_ERR_ILL_ARG,"Parameter LIS_EOPTIONS_MODE is %d (Set less than subspace size %d for Lanczos or Arnoldi)\n", mode, ss); return LIS_ERR_ILL_ARG; } #ifdef USE_QUAD_PRECISION if( eprecision==LIS_PRECISION_QUAD && lis_esolver_execute_quad[nesolver]==NULL ) { LIS_SETERR1(LIS_ERR_NOT_IMPLEMENTED,"Quad precision eigensolver %s is not implemented\n",lis_esolvername[nesolver]); return LIS_ERR_NOT_IMPLEMENTED; } else if( eprecision==LIS_PRECISION_SWITCH && lis_esolver_execute_switch[nesolver]==NULL ) { LIS_SETERR1(LIS_ERR_NOT_IMPLEMENTED,"Switch esolver %s is not implemented\n",lis_esolvername[nesolver]); return LIS_ERR_NOT_IMPLEMENTED; } if( esolver->options[LIS_EOPTIONS_SWITCH_MAXITER]==-1 ) { esolver->options[LIS_EOPTIONS_SWITCH_MAXITER] = emaxiter; } #endif /* create eigenvalue array */ if( esolver->evalue ) lis_free(esolver->evalue); evalue = (LIS_SCALAR *)lis_malloc((ss+2)*sizeof(LIS_SCALAR),"lis_esolve::evalue"); if( evalue==NULL ) { LIS_SETERR_MEM((ss+2)*sizeof(LIS_SCALAR)); esolver->retcode = err; return err; } evalue[0] = 1.0; evalue[ss-1] = 1.0; /* create residual norm array */ if( esolver->resid ) lis_free(esolver->resid); resid = (LIS_SCALAR *)lis_malloc((ss+2)*sizeof(LIS_SCALAR),"lis_esolve::resid"); if( resid==NULL ) { LIS_SETERR_MEM((ss+2)*sizeof(LIS_SCALAR)); esolver->retcode = err; return err; } /* create number of iterations array */ if( esolver->iter ) lis_free(esolver->iter); iter = (LIS_INT *)lis_malloc((ss+2)*sizeof(LIS_SCALAR),"lis_esolve::iter"); if( iter==NULL ) { LIS_SETERR_MEM((ss+2)*sizeof(LIS_SCALAR)); esolver->retcode = err; return err; } /* create quad precision number of iterations array */ if( esolver->iter2 ) lis_free(esolver->iter2); iter2 = (LIS_INT *)lis_malloc((ss+2)*sizeof(LIS_SCALAR),"lis_esolve::iter2"); if( iter2==NULL ) { LIS_SETERR_MEM((ss+2)*sizeof(LIS_SCALAR)); esolver->retcode = err; return err; } /* create initial vector */ #ifndef USE_QUAD_PRECISION err = lis_vector_duplicate(A,&xx); #else if( eprecision==LIS_PRECISION_DOUBLE ) { err = lis_vector_duplicate(A,&xx); } else { err = lis_vector_duplicateex(LIS_PRECISION_QUAD,A,&xx); } #endif if( err ) { esolver->retcode = err; return err; } if( esolver->options[LIS_EOPTIONS_INITGUESS_ONES] ) { if( output ) lis_printf(A->comm,"initial vector x : 1\n"); #ifndef USE_QUAD_PRECISION lis_vector_set_all(1.0,xx); #else if( eprecision==LIS_PRECISION_DOUBLE ) { lis_vector_set_all(1.0,xx); } else { lis_vector_set_allex_nm(1.0,xx); } #endif } else { if( output ) lis_printf(A->comm,"initial vector x : user defined\n"); #ifndef USE_QUAD_PRECISION lis_vector_copy(x,xx); #else if( eprecision==LIS_PRECISION_DOUBLE ) { lis_vector_copy(x,xx); } else { lis_vector_copyex_nm(x,xx); } #endif } /* global shift */ if ( output ) if( A->my_rank==0 ) printf("shift : %e\n", gshift); /* create eigenvector array */ if( esolver->evector ) lis_free(esolver->evector); evector = (LIS_VECTOR *)lis_malloc((ss+2)*sizeof(LIS_VECTOR),"lis_esolve::evector"); if( evector==NULL ) { LIS_SETERR_MEM((ss+2)*sizeof(LIS_VECTOR)); esolver->retcode = err; return err; } /* create residual history array */ if( esolver->rhistory ) lis_free(esolver->rhistory); rhistory = (LIS_SCALAR *)lis_malloc((emaxiter+2)*sizeof(LIS_SCALAR),"lis_esolve::rhistory"); if( rhistory==NULL ) { LIS_SETERR_MEM((emaxiter+2)*sizeof(LIS_SCALAR)); lis_vector_destroy(xx); esolver->retcode = err; return err; } /* convert matrix */ if( estorage>0 && A->matrix_type!=estorage ) { err = lis_matrix_duplicate(A,&B); if( err ) return err; lis_matrix_set_blocksize(B,eblock,eblock,NULL,NULL); lis_matrix_set_type(B,estorage); err = lis_matrix_convert(A,B); if( err ) return err; lis_matrix_storage_destroy(A); lis_matrix_DLU_destroy(A); lis_matrix_diag_destroy(A->WD); if( A->l2g_map ) lis_free( A->l2g_map ); if( A->commtable ) lis_commtable_destroy( A->commtable ); if( A->ranges ) lis_free( A->ranges ); err = lis_matrix_copy_struct(B,A); if( err ) return err; lis_free(B); } esolver->A = A; esolver->evalue = evalue; esolver->x = x; esolver->evector = evector; rhistory[0] = 1.0; esolver->rhistory = rhistory; esolver->resid = resid; esolver->iter = iter; esolver->iter2 = iter2; if( A->my_rank==0 ) { #ifdef _LONG__DOUBLE if ( output ) printf("precision : long double\n"); #else if ( output ) printf("precision : %s\n", lis_eprecisionname[eprecision]); #endif #ifdef _LONG__LONG if ( output ) printf("eigensolver : %s\n", lis_esolvername[nesolver]); #else if ( output ) printf("eigensolver : %s\n", lis_esolvername[nesolver]); #endif } if( A->my_rank==0 ) { #ifdef _LONG__DOUBLE if ( output ) printf("convergence condition : ||lx-Ax||_2 <= %6.1Le * ||lx||_2\n", esolver->params[LIS_EPARAMS_RESID - LIS_EOPTIONS_LEN]); #else if ( output ) printf("convergence condition : ||lx-Ax||_2 <= %6.1e * ||lx||_2\n", esolver->params[LIS_EPARAMS_RESID - LIS_EOPTIONS_LEN]); #endif } if( A->my_rank==0 ) { if( A->matrix_type==LIS_MATRIX_BSR || A->matrix_type==LIS_MATRIX_BSC ) { #ifdef _LONG__LONG if ( output ) printf("matrix storage format : %s(%lld x %lld)\n", lis_estoragename[A->matrix_type-1],eblock,eblock); #else if ( output ) printf("matrix storage format : %s(%d x %d)\n", lis_estoragename[A->matrix_type-1],eblock,eblock); #endif } else { if ( output ) printf("matrix storage format : %s\n", lis_estoragename[A->matrix_type-1]); } } time = lis_wtime(); esolver->ptime = 0; esolver->itime = 0; esolver->p_c_time = 0; esolver->p_i_time = 0; if (gshift != 0.0) lis_matrix_shift_diagonal(A, gshift); /* create work vector */ err = lis_esolver_malloc_work[nesolver](esolver); if( err ) { lis_vector_destroy(xx); esolver->retcode = err; return err; } esolver->x = xx; esolver->xx = x; /* execute esolver */ #ifndef USE_QUAD_PRECISION err = lis_esolver_execute[nesolver](esolver); #else if( eprecision==LIS_PRECISION_DOUBLE ) { err = lis_esolver_execute[nesolver](esolver); } else if( eprecision==LIS_PRECISION_QUAD ) { err = lis_esolver_execute_quad[nesolver](esolver); } else if( eprecision==LIS_PRECISION_SWITCH ) { err = lis_esolver_execute_switch[nesolver](esolver); } #endif esolver->retcode = err; *evalue0 = esolver->evalue[0]; lis_vector_copy(esolver->x, x); esolver->time = lis_wtime() - time; lis_matrix_shift_diagonal(A, -gshift); if( A->my_rank==0 ) { if( err ) { #ifdef _LONG__LONG if ( output ) printf("eigensolver status : %s(code=%lld)\n\n",lis_ereturncode[err],err); #else if ( output ) printf("eigensolver status : %s(code=%d)\n\n",lis_ereturncode[err],err); #endif } else { if ( output ) printf("eigensolver status : normal end\n\n"); } } if( eprecision==LIS_PRECISION_DOUBLE ) { esolver->iter2[mode] = esolver->iter[mode]; } else if( eprecision==LIS_PRECISION_QUAD ) { esolver->iter2[mode] = 0; } lis_vector_destroy(xx); LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; }
LIS_INT lis_vector_dotex_mmm(LIS_VECTOR vx, LIS_VECTOR vy, LIS_QUAD_PTR *val) { LIS_INT i,n; LIS_SCALAR *x,*y,*xl,*yl; LIS_QUAD_PTR dotm2,dotm,tmpm; #ifdef _OPENMP LIS_INT is,ie,nprocs,my_rank; LIS_SCALAR *gt; #endif #ifdef USE_MPI MPI_Comm comm; #endif LIS_QUAD_DECLAR; LIS_DEBUG_FUNC_IN; n = vx->n; x = vx->value; y = vy->value; xl = vx->value_lo; yl = vy->value_lo; dotm2.hi = &vx->work[0]; dotm2.lo = &vx->work[2]; dotm.hi = &vx->work[8]; dotm.lo = &vx->work[9]; tmpm.hi = &vx->work[10]; tmpm.lo = &vx->work[11]; #ifndef NO_ERROR_CHECK if( n!=vy->n ) { LIS_SETERR(LIS_ERR_ILL_ARG,"length of vector x and y is not equal\n"); return LIS_ERR_ILL_ARG; } #endif #ifdef USE_MPI comm = vx->comm; #endif #ifdef _OPENMP gt = lis_vec_tmp; nprocs = omp_get_max_threads(); #ifndef USE_SSE2 #pragma omp parallel private(i,p1,p2,tq,bhi,blo,chi,clo,sh,th,sl,tl,eh,el,is,ie,my_rank) #else #pragma omp parallel private(i,bh,ch,sh,wh,th,bl,cl,sl,wl,tl,p1,p2,t0,t1,t2,eh,is,ie,my_rank) #endif { my_rank = omp_get_thread_num(); LIS_GET_ISIE(my_rank,nprocs,n,is,ie); #ifndef USE_FMA2_SSE2 gt[my_rank*LIS_VEC_TMP_PADD] = gt[my_rank*LIS_VEC_TMP_PADD+1] = 0.0; #pragma cdir nodep for(i=is;i<ie;i++) { LIS_QUAD_FMA(gt[my_rank*LIS_VEC_TMP_PADD],gt[my_rank*LIS_VEC_TMP_PADD+1],gt[my_rank*LIS_VEC_TMP_PADD],gt[my_rank*LIS_VEC_TMP_PADD+1],y[i],yl[i],x[i],xl[i]); } #else gt[my_rank*LIS_VEC_TMP_PADD ] = gt[my_rank*LIS_VEC_TMP_PADD+1] = 0.0; gt[my_rank*LIS_VEC_TMP_PADD+2] = gt[my_rank*LIS_VEC_TMP_PADD+3] = 0.0; #ifdef USE_VEC_COMP #pragma cdir nodep #endif for(i=is;i<ie-1;i+=2) { LIS_QUAD_FMA2_SSE2(gt[my_rank*LIS_VEC_TMP_PADD],gt[my_rank*LIS_VEC_TMP_PADD+2],gt[my_rank*LIS_VEC_TMP_PADD],gt[my_rank*LIS_VEC_TMP_PADD+2],y[i],yl[i],x[i],xl[i]); } LIS_QUAD_ADD_SSE2(gt[my_rank*LIS_VEC_TMP_PADD],gt[my_rank*LIS_VEC_TMP_PADD+1],gt[my_rank*LIS_VEC_TMP_PADD],gt[my_rank*LIS_VEC_TMP_PADD+2],gt[my_rank*LIS_VEC_TMP_PADD+1],gt[my_rank*LIS_VEC_TMP_PADD+3]); for(;i<ie;i++) { LIS_QUAD_FMA_SSE2(gt[my_rank*LIS_VEC_TMP_PADD],gt[my_rank*LIS_VEC_TMP_PADD+1],gt[my_rank*LIS_VEC_TMP_PADD],gt[my_rank*LIS_VEC_TMP_PADD+1],y[i],yl[i],x[i],xl[i]); } #endif } dotm.hi[0] = dotm.lo[0] = 0.0; for(i=0;i<nprocs;i++) { #ifndef USE_SSE2 LIS_QUAD_ADD(dotm.hi[0],dotm.lo[0],dotm.hi[0],dotm.lo[0],gt[i*LIS_VEC_TMP_PADD],gt[i*LIS_VEC_TMP_PADD+1]); #else LIS_QUAD_ADD_SSE2(dotm.hi[0],dotm.lo[0],dotm.hi[0],dotm.lo[0],gt[i*LIS_VEC_TMP_PADD],gt[i*LIS_VEC_TMP_PADD+1]); #endif } #else #ifndef USE_FMA2_SSE2 dotm.hi[0] = dotm.lo[0] = 0.0; #pragma cdir nodep for(i=0;i<n;i++) { LIS_QUAD_FMA(dotm.hi[0],dotm.lo[0],dotm.hi[0],dotm.lo[0],y[i],yl[i],x[i],xl[i]); } #else dotm2.hi[0] = dotm2.hi[1] = 0.0; dotm2.lo[0] = dotm2.lo[1] = 0.0; for(i=0;i<n-1;i+=2) { LIS_QUAD_FMA2_SSE2(dotm2.hi[0],dotm2.lo[0],dotm2.hi[0],dotm2.lo[0],y[i],yl[i],x[i],xl[i]); } LIS_QUAD_ADD_SSE2(dotm.hi[0],dotm.lo[0],dotm2.hi[0],dotm2.lo[0],dotm2.hi[1],dotm2.lo[1]); for(;i<n;i++) { LIS_QUAD_FMA_SSE2(dotm.hi[0],dotm.lo[0],dotm.hi[0],dotm.lo[0],y[i],yl[i],x[i],xl[i]); } #endif #endif #ifdef USE_MPI MPI_Allreduce(dotm.hi,tmpm.hi,1,LIS_MPI_MSCALAR,LIS_MPI_MSUM,comm); val->hi[0] = tmpm.hi[0]; val->lo[0] = tmpm.lo[0]; #else val->hi[0] = dotm.hi[0]; val->lo[0] = dotm.lo[0]; #endif LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; }
LIS_INT lis_vector_dot(LIS_VECTOR vx, LIS_VECTOR vy, LIS_SCALAR *value) { LIS_INT i,n; LIS_SCALAR dot; LIS_SCALAR *x,*y; LIS_SCALAR tmp; #ifdef _OPENMP LIS_INT nprocs,my_rank; #endif #ifdef USE_MPI MPI_Comm comm; #endif LIS_DEBUG_FUNC_IN; n = vx->n; #ifndef NO_ERROR_CHECK if( n!=vy->n ) { LIS_SETERR(LIS_ERR_ILL_ARG,"length of vector x and y is not equal\n"); return LIS_ERR_ILL_ARG; } #endif x = vx->value; y = vy->value; #ifdef USE_MPI comm = vx->comm; #endif #ifdef _OPENMP nprocs = omp_get_max_threads(); #pragma omp parallel private(i,tmp,my_rank) { my_rank = omp_get_thread_num(); tmp = 0.0; #ifdef USE_VEC_COMP #pragma cdir nodep #endif #pragma omp for for(i=0; i<n; i++) { tmp += x[i]*y[i]; } lis_vec_tmp[my_rank*LIS_VEC_TMP_PADD] = tmp; } dot = 0.0; for(i=0;i<nprocs;i++) { dot += lis_vec_tmp[i*LIS_VEC_TMP_PADD]; } #else dot = 0.0; #ifdef USE_VEC_COMP #pragma cdir nodep #endif for(i=0; i<n; i++) { dot += x[i]*y[i]; } #endif #ifdef USE_MPI MPI_Allreduce(&dot,&tmp,1,MPI_DOUBLE,MPI_SUM,comm); *value = tmp; #else *value = dot; #endif LIS_DEBUG_FUNC_OUT; return LIS_SUCCESS; }