void quicksort(uint length, uint *feld,bool median){ quicksort(0,length-1,feld,median); }
float KDTree::chooseCut(Object3d** p_objList, int p_objCount, BoundingBox* p_bbox, int p_axis) { float* list = (float *)malloc(sizeof(float) * p_objCount); float min, max; if(p_axis == KD_AXIS_X) { for(int i = 0; i < p_objCount; i++) { list[i] = p_objList[i]->getBoundingBox()->x0; } min = p_bbox->x0; max = p_bbox->x1; } if(p_axis == KD_AXIS_Y) { for(int i = 0; i < p_objCount; i++) { list[i] = p_objList[i]->getBoundingBox()->y0; } min = p_bbox->y0; max = p_bbox->y1; } if(p_axis == KD_AXIS_Z) { for(int i = 0; i < p_objCount; i++) { list[i] = p_objList[i]->getBoundingBox()->z0; } min = p_bbox->z0; max = p_bbox->z1; } quicksort(list, 0, p_objCount - 1); float median = list[p_objCount/2]; delete list; return median; /* BoundingBox* bbox = p_bbox; float cut = 0; if(p_axis == KD_AXIS_X) { float xd = bbox->x1 - bbox->x0; cut = bbox->x0 + xd * 0.5f; } if(p_axis == KD_AXIS_Y) { float yd = bbox->y1 - bbox->y0; cut = bbox->y0 + yd * 0.5f; } if(p_axis == KD_AXIS_Z) { float zd = bbox->z1 - bbox->z0; cut = bbox->z0 + zd * 0.5f; } return cut; */ /* int minIndex; Object3d* minObj; float minValue; Object3d* obj; float value; Object3d* objTmp; int halfArrayIndex = p_objCount / 2; for(int i = 0; i < halfArrayIndex; i++) { minIndex = i; minObj = p_objList[i]; if(p_axis == KD_AXIS_X) { minValue = minObj->getBoundingBox()->x0; } else if(p_axis == KD_AXIS_Y) { minValue = minObj->getBoundingBox()->y0; } else if(p_axis == KD_AXIS_Z) { minValue = minObj->getBoundingBox()->z0; } for(int j = i + 1; j < p_objCount; j++) { obj = p_objList[j]; if(p_axis == KD_AXIS_X) { value = obj->getBoundingBox()->x0; } else if(p_axis == KD_AXIS_Y) { value = obj->getBoundingBox()->y0; } else if(p_axis == KD_AXIS_Z) { value = obj->getBoundingBox()->z0; } if(value < minValue) { minIndex = j; minValue = value; } } if(minIndex != i) { objTmp = p_objList[minIndex]; p_objList[minIndex] = p_objList[i]; p_objList[i] = objTmp; } } return minValue; */ }
int main(int argc, char* argv[]) { /* argument variables */ int f1=0,f2=0,fi=1; char *defdir = "."; char *inbase = NULL, *postname = NULL; char *indir = defdir; /* fild variables */ FILE *fid; char fname[100]; /* data variables */ int i,*cpuid,*property,ntype; long p,n,*pid,*order; float header[20],**data,time[2],*typeinfo=NULL; /* Read Arguments */ for (i=1; i<argc; i++) { /* If argv[i] is a 2 character string of the form "-?" then: */ if(*argv[i] == '-' && *(argv[i]+1) != '\0' && *(argv[i]+2) == '\0') { switch(*(argv[i]+1)) { case 'd': /* -d <indir> */ indir = argv[++i]; break; case 'i': /* -i <basename> */ inbase = argv[++i]; break; case 's': /* -s <post-name> */ postname = argv[++i]; break; case 'f': /* -f <# range(f1:f2:fi)>*/ sscanf(argv[++i],"%d:%d:%d",&f1,&f2,&fi); if (f2 == 0) f2 = f1; break; case 'h': /* -h */ usage(argv[0]); break; default: usage(argv[0]); break; } } } /* Checkpoints */ if (inbase == NULL) sort_error("Please specify input file basename using -i option!\n"); if (postname == NULL) sort_error("Please specify posterior file name using -s option!\n"); if ((f1>f2) || (f2<0) || (fi<=0)) sort_error("Wrong number sequence in the -f option!\n"); /* ====================================================================== */ for (i=f1; i<=f2; i+=fi) { fprintf(stderr,"Processing file number %d...\n",i); /* Step 1: Read the file */ sprintf(fname,"%s/%s.%04d.%s.lis",indir,inbase,i,postname); fid = fopen(fname,"rb"); if (fid == NULL) sort_error("Fail to open output file %s!\n",fname); /* read header */ fread(header,sizeof(float),12,fid); fread(&ntype,sizeof(int),1,fid); if (i == f1) typeinfo = (float*)calloc_1d_array(ntype,sizeof(float)); fread(typeinfo,sizeof(float),ntype,fid); fread(&time,sizeof(float),2,fid); fread(&n,sizeof(long),1,fid); data = (float**)calloc_2d_array(n,7,sizeof(float)); property = (int*)calloc_1d_array(n,sizeof(int)); pid = (long*)calloc_1d_array(n,sizeof(long)); cpuid = (int*)calloc_1d_array(n,sizeof(int)); order = (long*)calloc_1d_array(n,sizeof(long)); /* read data */ for (p=0; p<n; p++) { fread(data[p],sizeof(float),7,fid); fread(&(property[p]),sizeof(int),1,fid); fread(&(pid[p]),sizeof(long),1,fid); fread(&(cpuid[p]),sizeof(int),1,fid); } fclose(fid); /* Step 2: sort the particles */ for (p=0; p<n; p++) order[p] = p; quicksort(cpuid, pid, order, 0, n-1); /* Step 3: write back the ordered particle list */ fid = fopen(fname,"wb"); /* write header */ fwrite(header,sizeof(float),12,fid); fwrite(&ntype,sizeof(int),1,fid); fwrite(typeinfo,sizeof(float),ntype,fid); fwrite(time,sizeof(float),2,fid); fwrite(&n,sizeof(long),1,fid); /* write data */ for (p=0; p<n; p++) { fwrite(data[order[p]],sizeof(float),7,fid); fwrite(&property[p],sizeof(int),1,fid); fwrite(&pid[order[p]],sizeof(long),1,fid); fwrite(&cpuid[order[p]],sizeof(int),1,fid); } free_2d_array(data); free(property); free(pid); free(cpuid); free(order); fclose(fid); } if (typeinfo != NULL) free(typeinfo); return 0; }
/** * quick sort algorithm. * * @see cel_sort#cel_shell_sort */ CEL_API void cel_quick_sort( void *a, uint_t length, uint_t size, cel_compare_fn_t comp) { uchar_t * arr = (uchar_t *) a; quicksort(arr, size, comp, 0, length - 1); }
int main(int argc, char *argv[]) { FILE *fp = stdin; double *x, *mean, *med = NULL, **mtmp = NULL, **cov = NULL, **invcov = NULL, *var = NULL, conf = CONFLEV, *upper = NULL, *lower = NULL, t, err; int leng = LENG, nv = -1, i, j, k = 0, lp = 0, m, outtype = 0, count = 0; Boolean outmean = OUTMEAN, outcov = OUTCOV, outconf = OUTCONF, outmed = OUTMED, diagc = DIAGC, inv = INV, corr = CORR; if ((cmnd = strrchr(argv[0], '/')) == NULL) cmnd = argv[0]; else cmnd++; while (--argc) if (**++argv == '-') { switch (*(*argv + 1)) { case 'l': leng = atoi(*++argv); --argc; break; case 'n': leng = atoi(*++argv) + 1; --argc; break; case 't': nv = atoi(*++argv); --argc; break; case 'o': outtype = atoi(*++argv); --argc; break; case 'c': conf = atof(*++argv); --argc; break; case 'd': diagc = 1 - diagc; break; case 'i': inv = 1 - inv; break; case 'r': corr = 1 - corr; break; case 'h': usage(0); default: fprintf(stderr, "%s : Invalid option '%c'!\n", cmnd, *(*argv + 1)); usage(1); } } else fp = getfp(*argv, "rb"); if (conf < 0 || conf > 100) { fprintf(stderr, "%s : Confidence level must be greater than 0 and less than 1.0!\n", cmnd); } switch (outtype) { case 1: outcov = FA; break; case 2: outmean = FA; break; case 3: outcov = FA; outconf = TR; break; case 4: outcov = FA; outmean = FA; outmed = TR; break; } if (diagc && corr) diagc = FA; if (diagc && inv) diagc = FA; if (corr && inv) corr = FA; mean = dgetmem(leng + leng); x = mean + leng; if (outmed) { if (nv == -1) { typedef struct _float_list { float *f; struct _float_list *next; } float_list; float_list *top = NULL, *prev = NULL, *cur = NULL; top = prev = (float_list *) getmem(1, sizeof(float_list)); prev->next = NULL; while (freadf(x, sizeof(*x), leng, fp) == leng) { cur = (float_list *) getmem(1, sizeof(float_list)); cur->f = fgetmem(leng); for (i = 0; i < leng; i++) { cur->f[i] = (float) x[i]; } count++; prev->next = cur; cur->next = NULL; prev = cur; } k = count; mtmp = (double **) getmem(leng, sizeof(*mtmp)); mtmp[0] = dgetmem(leng * k); for (i = 1; i < leng; i++) mtmp[i] = mtmp[i - 1] + k; med = dgetmem(leng); for (j = 0, cur = top->next; j < k; j++, cur = cur->next) { for (i = 0; i < leng; i++) { mtmp[i][j] = (double) cur->f[i]; } } } else { k = nv; mtmp = (double **) getmem(leng, sizeof(*mtmp)); mtmp[0] = dgetmem(leng * k); for (i = 1; i < leng; i++) mtmp[i] = mtmp[i - 1] + k; med = dgetmem(leng); for (j = 0; j < k; j++) { for (i = 0; i < leng; i++) { freadf(&mtmp[i][j], sizeof(**mtmp), 1, fp); } } } if (k % 2 == 0) { fprintf(stderr, "%s : warning: the number of vectors is even!\n", cmnd); } for (i = 0; i < leng; i++) { quicksort(mtmp[i], 0, k - 1); if (k % 2 == 1) { med[i] = mtmp[i][k / 2]; } else { med[i] = ((mtmp[i][k / 2] + mtmp[i][k / 2 - 1]) / 2); } } fwritef(med, sizeof(*med), leng, stdout); return (0); } if (outcov) { if (!diagc) { cov = (double **) getmem(leng, sizeof(*cov)); cov[0] = dgetmem(leng * leng); for (i = 1; i < leng; i++) cov[i] = cov[i - 1] + leng; if (inv) { invcov = (double **) getmem(leng, sizeof(*invcov)); invcov[0] = dgetmem(leng * leng); for (i = 1; i < leng; i++) invcov[i] = invcov[i - 1] + leng; } } else var = dgetmem(leng); } if (outconf) { var = dgetmem(leng); upper = dgetmem(leng); lower = dgetmem(leng); } while (!feof(fp)) { for (i = 0; i < leng; i++) { mean[i] = 0.0; if (outcov) { if (!diagc) for (j = 0; j < leng; j++) cov[i][j] = 0.0; else var[i] = 0.0; } if (outconf) { var[i] = 0.0; } } for (lp = nv; lp;) { if (freadf(x, sizeof(*x), leng, fp) != leng) break; for (i = 0; i < leng; i++) { mean[i] += x[i]; if (outcov) { if (!diagc) for (j = i; j < leng; j++) cov[i][j] += x[i] * x[j]; else var[i] += x[i] * x[i]; } if (outconf) { var[i] += x[i] * x[i]; } } --lp; } if (lp == 0 || nv == -1) { if (nv > 0) k = nv; else k = -lp - 1; for (i = 0; i < leng; i++) mean[i] /= k; if (outcov) { if (!diagc) for (i = 0; i < leng; i++) for (j = i; j < leng; j++) cov[j][i] = cov[i][j] = cov[i][j] / k - mean[i] * mean[j]; else for (i = 0; i < leng; i++) var[i] = var[i] / k - mean[i] * mean[i]; } if (outconf) { for (i = 0; i < leng; i++) { var[i] = (var[i] - k * mean[i] * mean[i]) / (k - 1); } t = t_percent(conf / 100, k - 1); for (i = 0; i < leng; i++) { err = t * sqrt(var[i] / k); upper[i] = mean[i] + err; lower[i] = mean[i] - err; } } if (corr) { for (i = 0; i < leng; i++) for (j = i + 1; j < leng; j++) cov[j][i] = cov[i][j] = cov[i][j] / sqrt(cov[i][i] * cov[j][j]); for (i = 0; i < leng; i++) cov[i][i] = 1.0; } if (outmean) fwritef(mean, sizeof(*mean), leng, stdout); if (outcov) { if (!diagc) { if (inv) { for (i = 0; i < leng; i++) { for (j = i + 1; j < leng; j++) { cov[j][i] /= cov[i][i]; for (m = i + 1; m < leng; m++) cov[j][m] -= cov[i][m] * cov[j][i]; } } for (m = 0; m < leng; m++) { for (i = 0; i < leng; i++) { if (i == m) invcov[i][m] = 1.0; else invcov[i][m] = 0.0; } for (i = 0; i < leng; i++) { for (j = i + 1; j < leng; j++) invcov[j][m] -= invcov[i][m] * cov[j][i]; } for (i = leng - 1; i >= 0; i--) { for (j = i + 1; j < leng; j++) invcov[i][m] -= cov[i][j] * invcov[j][m]; invcov[i][m] /= cov[i][i]; } } fwritef(invcov[0], sizeof(*invcov[0]), leng * leng, stdout); } else fwritef(cov[0], sizeof(*cov[0]), leng * leng, stdout); } else fwritef(var, sizeof(*var), leng, stdout); } if (outconf) { fwritef(upper, sizeof(*upper), leng, stdout); fwritef(lower, sizeof(*lower), leng, stdout); } } } return (0); }
void escreverEmArquivo(char *nomeArquivo) { FILE *file; long long int i; int numero; int *vet, *vet2,*vet3; vet = (int*)malloc(sizeof(int)*tam); vet2 = (int*)malloc(sizeof(int)*tam); vet3 = (int*)malloc(sizeof(int)*tam); double inicio, fim, tempo, inicio2, fim2, tempo2, inicio3, fim3, tempo3; file = fopen(nomeArquivo, "w+b"); if( file == NULL) { printf("erro ao abrir arquivo\n"); exit(0); } for(i = 1; i < tam ; ++i) //escreve caracteres { numero = tam - i; //printf(" [%d] ",numero); fwrite(&numero, sizeof(int), 1, file); } rewind(file); i = 1; while(!feof(file)) { fread(&vet[i],sizeof(int),1,file); //fread(&vet2[i],sizeof(int),1,file); //fread(&vet3[i],sizeof(int),1,file); i++; } //imprimindo os valores do primeiro vetor; inicio = clock(); heapsort(vet,tam); fim = clock(); tempo = (fim-inicio)/CLOCKS_PER_SEC; // recebendo o tempo ; i = 1; while(!feof(file)) { fread(&vet[i],sizeof(int),1,file); //fread(&vet2[i],sizeof(int),1,file); //fread(&vet3[i],sizeof(int),1,file); i++; } i = 1; while(!feof(file)) { fread(&vet[i],sizeof(int),1,file); //fread(&vet2[i],sizeof(int),1,file); //fread(&vet3[i],sizeof(int),1,file); i++; } //imprimindo os valores do segundo vetor; printf("\n\n"); inicio2 = clock();i = 1; quicksort(vet,1,tam); fim2 = clock(); tempo2 = (fim2-inicio2)/CLOCKS_PER_SEC; //recebendo o tempo; i = 1; while(!feof(file)) { fread(&vet[i],sizeof(int),1,file); //fread(&vet2[i],sizeof(int),1,file); //fread(&vet3[i],sizeof(int),1,file); i++; } printf("\n\n"); inicio3 = clock(); mergesort(vet,1,tam); fim3 = clock(); tempo3 = (fim3-inicio3)/CLOCKS_PER_SEC; printf("\n\n"); printf(" TEMPO HEAP: %4.4lf\n",tempo); printf(" TEMPO QUICK: %4.4lf\n",tempo2); printf(" TEMPO MERGE: %4.4lf\n",tempo3); fclose(file); }
void quicksort(int* arr,int length){ quicksort(arr,0,length-1); }
static PyObject* PositionWeightMatrix_nullScoreDistribution( PositionWeightMatrix* self, PyObject* args ) { PyObject* seq; int reps; if( !PyArg_ParseTuple( args, "Oi", &seq, &reps ) ) return NULL; seq = PySequence_Fast( seq, "argument must be iterable" ); if( !seq ) return NULL; PyObject** elems = PySequence_Fast_ITEMS( seq ); size_t len = PySequence_Fast_GET_SIZE( seq ); //allocate array to put elems in long* array = malloc( len * sizeof(long) ); size_t i; for( i = 0 ; i < len ; ++i ) { array[i] = PyInt_AsLong( elems[i] ); } //check for error if( PyErr_Occurred() ) { free( array ); return NULL; } double** matrix = self->matrix; int n = self->n; int m = self->m; //allocate scores array size_t nmers = ( len - n + 1 ); size_t lenScores = nmers*reps; double* scores = malloc( lenScores * sizeof( double ) ); //for each replicate, shuffle and score the sequence int rep = 0; while( rep < reps ) { shuffle( array, len, sizeof( long ) ); scoreAllLong( matrix, n, m, array, len, scores, nmers * rep ); ++rep; } //sort the scores quicksort( scores, lenScores, sizeof( double ), &compareDoubleDescending ); //free the long array free( array ); //build a python list to return PyObject* list = PyList_New( lenScores ); for( i = 0 ; i < lenScores ; ++i ) { PyList_SetItem( list, i, PyFloat_FromDouble( scores[i] ) ); } //free the scores array free( scores ); return list; }
void s_samplesort(int fd, off_t size,char *base_name, int t){ struct queue_buf *buff; struct queue_buf **file_buff; int *files, *sizes, *keys; int i,j,cur,r,ret,ret1; char name_file[500]; int k; k = t * min(ceildiv(size,M),ceildiv(M,B)); r = 0; buff = qb_new(M); files = (int *)malloc(sizeof(int)*k); sizes = (int *)malloc(sizeof(int)*k); keys = (int *)malloc(sizeof(int)*(k+1)); file_buff = (struct queue_buf **)malloc(sizeof(struct queue_buf *)*k); select_keys(keys,fd,size,k); for(i = 0; i < k; i++){ file_buff[i] = qb_new(B); sizes[i] = 0; sprintf(name_file, "%s_%d\0", base_name,i); if((files[i] = open(name_file, O_RDWR | O_CREAT, S_IRWXU|S_IRWXG|S_IRWXO))== -1){ printf("%s_%d fail1\n", base_name,i); perror("aca"); exit(1); } results.io_rand++; } j=1; while(1){ if((ret1 = qb_refill(buff,fd))== -1){ printf("%s_%d fail2\n", base_name,i); perror("aca"); exit(1); } results.io_rand++; results.io_acc+=ceildiv(buff->n_elems,B); if (qb_empty(buff)) break; while(!qb_empty(buff)){ cur = qb_dequeue(buff); i = bucket(cur,keys,k+1); qb_enqueue(file_buff[i], cur); if(qb_full(file_buff[i])){ results.io_rand++; results.io_acc+=ceildiv(file_buff[i]->n_elems,B); sizes[i] += file_buff[i]->n_elems; if((ret = qb_flush(file_buff[i],files[i]))== -1){ printf("%s_%d fail3\n", base_name,i); perror("aca"); exit(1); } } } } close(fd); remove(curr_name); /* Pueden quedar restos en los bufs. Hay que vaciar todo */ for (i = 0; i < k; i++) { if (!qb_empty(file_buff[i])) { results.io_rand++; results.io_acc+=ceildiv(file_buff[i]->n_elems,B); sizes[i] += file_buff[i]->n_elems; qb_flush(file_buff[i], files[i]); } close(files[i]); qb_free(file_buff[i]); } free(file_buff); free(keys); for(i = 0; i < k; i++){ sprintf(name_file, "%s_%d\0", base_name,i); if(sizes[i] == 0){ remove(name_file); } else if(sizes[i] <= M){ if((files[i] = open(name_file, O_RDWR | O_CREAT, S_IRWXU|S_IRWXG|S_IRWXO))== -1){ printf("%s_%d fail1\n", base_name,i); perror("aca"); exit(1); } if((ret1 = qb_refill(buff,files[i]))== -1){ printf("%s_%d fail2\n", base_name,i); perror("aca"); exit(1); } results.io_rand++; results.io_acc+=ceildiv(buff->n_elems,B); quicksort(buff); lseek64(files[i],0,SEEK_SET); results.io_rand++; qb_flush(buff,files[i]); results.io_rand++; results.io_acc+=ceildiv(buff->n_elems,B); close(files[i]); remove(name_file); } } qb_free(buff); for (i = 0; i < k; i++) { sprintf(name_file, "%s_%d\0", base_name,i); if (sizes[i] > M) { if((files[i] = open(name_file, O_RDWR | O_CREAT, S_IRWXU|S_IRWXG|S_IRWXO))== -1){ printf("%s_%d fail1\n",base_name, i); perror("aca"); exit(1); } results.io_rand++; curr_name= name_file; s_samplesort(files[i],sizes[i],name_file,t); } } free(sizes); free(files); }
int main(void) { const int MAX_STALLS = 100001; int t; long long int *stall_positions; stall_positions = malloc(MAX_STALLS * sizeof(*stall_positions)); scanf("%d\n", &t); while (t-- > 0) { long int no_stalls, no_cows, i; scanf("%ld %ld\n", &no_stalls, &no_cows); for (i=0; i<no_stalls; i++) scanf("%lld\n", stall_positions+i); // heapsort(stall_positions, no_stalls); quicksort(stall_positions, 0, no_stalls-1); // printf("sorted stall_positions:"); for (i=0; i<no_stalls; i++) printf(" %lld", stall_positions[i]); printf("\n"); long long int max_distance, start, distance, end; start = 0; end = stall_positions[no_stalls-1] - stall_positions[0] + 1; max_distance = 0; while (start < end) { distance = (end+start)/2; // count how many elements exist in stall_positions such that the difference between two such elements is greater than distance long int count; long long int prev; count = 1; prev = stall_positions[0]; for (i=1; i<no_stalls; i++) if (stall_positions[i]-prev >= distance) { prev = stall_positions[i]; count += 1; } if (count >= no_cows) { // it is possible to place all cows in stalls such that the distance between two stalls with cows is gte distance if (distance > max_distance) max_distance = distance; start = distance+1; } else { end = distance; } } printf("%lld\n", max_distance); } free(stall_positions); return 0; }
int main(int argc, char* argv[]) { int* bodies_off; int* n_bodies_split; int n_local_bodies; const MPI_Comm comm = MPI_COMM_WORLD; FILE *inputf; FILE *outputf; double clockStart, clockEnd; int rc, n_proc, rank; rc = MPI_Init(&argc, &argv); if (rc != MPI_SUCCESS) { puts("MPI_Init failed"); exit(-1); } MPI_Comm_size(comm, &n_proc); MPI_Comm_rank(comm, &rank); //creazione datatype per mpi! MPI_Datatype bodytype; MPI_Datatype type[6] = { MPI_LB, MPI_DOUBLE, MPI_DOUBLE, MPI_DOUBLE, MPI_DOUBLE, MPI_UB }; int block_len[6] = {1, 1, 3, 3, 3, 1}; MPI_Aint disp[6]; leaf_t example[2]; MPI_Get_address(&example[0], &disp[0]); MPI_Get_address(&(example[0].mass), &disp[1]); MPI_Get_address(&(example[0].pos), &disp[2]); MPI_Get_address(&(example[0].vel), &disp[3]); MPI_Get_address(&(example[0].acc), &disp[4]); MPI_Get_address(&(example[1].acc), &disp[5]); // int i; // for(i = 6; i >= 0; --i) // disp[i] -= disp[0]; disp[1] = disp[1] - disp[0]; disp[2] = disp[2] - disp[0]; disp[3] = disp[3] - disp[0]; disp[4] = disp[4] - disp[0]; disp[5] = disp[5] - disp[0]; MPI_Type_create_struct(6, block_len, disp, type, &bodytype); MPI_Type_commit(&bodytype); bodies_off = malloc((n_proc + 1) * sizeof(int)); n_bodies_split = malloc((n_proc) * sizeof(int)); bodies = malloc(nbodies * sizeof(node_t*)); leafs = malloc(nbodies * sizeof(leaf_t)); char* inputfile = argv[1]; inputf = fopen(inputfile, "r"); if (inputf == NULL) { printf("impossibile leggere da file"); exit(1); } fscanf(inputf, "%d", &nbodies); fscanf(inputf, "%d", &steps); fscanf(inputf, "%lf", &dt); fscanf(inputf, "%lf", &eps); fscanf(inputf, "%lf", &tol); fclose(inputf); if (rank == 0) { int i; create_bodies(); quicksort(0, nbodies - 1); // bublesort(); // int i = 0; // for (i = 0; i < nbodies; i++) { // printf("%lf, %lf, %lf \n", bodies[i]->pos[0], bodies[i]->pos[1], // bodies[i]->pos[2]); // } n_local_bodies = nbodies / n_proc; //split delle particelle secondo shark & fish // split_bodies(n_proc, bodies_off, n_bodies_split); // n_local_bodies = n_bodies_split[rank]; // // MPI_Bcast(n_bodies_split, n_proc, MPI_INT, 0, comm); MPI_Bcast(leafs, nbodies, bodytype, 0, comm); dthf = 0.5 * dt; epssq = eps * eps; itolsq = 1.0 / (tol * tol); clockStart = MPI_Wtime(); int step = 0; root = NULL; for (step = 0; step < steps; step++) { compute_center_and_diameter(); root = malloc(sizeof(struct node_t)); // "new" is like "malloc" double mass_root = 0.0; root->type = 1; root->mass = &mass_root; root->pos = center; root->cell.childs[0] = NULL; root->cell.childs[1] = NULL; root->cell.childs[2] = NULL; root->cell.childs[3] = NULL; root->cell.childs[4] = NULL; root->cell.childs[5] = NULL; root->cell.childs[6] = NULL; root->cell.childs[7] = NULL; double radius = diameter * 0.5; int i = 0; for (i = 0; i < nbodies; i++) { insert(root, bodies[i], radius); // questo è il modo per passare i dati per riferimento... cioè mandare l'indirizzo della struttura puntata dal puntatore } curr = 0; compute_center_of_mass(&(*root)); for (i = 0; i < n_local_bodies; i++) { compute_force(&(*root), &(*bodies[i]), diameter, step); } // for (i = 0; i < nbodies; i++) { // } deallocate_tree(root); //inserire all gather MPI_Allgather(leafs, n_local_bodies, bodytype, leafs, n_local_bodies, bodytype, comm); for (i = 0; i < nbodies; i++) { advance(&(*bodies[i])); } // int p = 0; // for (p = 0; p < nbodies; p++) // printf("%lf, %lf, %lf \n", bodies[p]->pos[0], bodies[p]->pos[1], // bodies[p]->pos[2]); // printf("*************************************** \n"); } // int i = 0; // dopo l'esecuzione!! // int proc_rec = 1; // while (proc_rec < n_proc) { // MPI_Status status; // int proc_rank; // int cap = nbodies / n_proc; // node_t temp[cap]; // MPI_Recv(temp, cap, bodytype, MPI_ANY_SOURCE, MPI_ANY_TAG, comm, // &status); // proc_rank = status.MPI_SOURCE; // // int idx = 0; // for (idx = proc_rec * (cap); idx < cap; idx++) // *bodies[idx] = temp[idx]; // proc_rec++; // } clockEnd = MPI_Wtime(); if (nbodies == 16384) { system("echo 'Host:' `hostname` >> output16384 "); outputf = fopen("output16384", "a"); fprintf(outputf, "Tempo di esecuzione: %lf \n", clockEnd - clockStart); for (i = 0; i < nbodies; i++) { fprintf(outputf, "%lf, %lf, %lf \n", bodies[i]->pos[0], bodies[i]->pos[1], bodies[i]->pos[2]); } } else if (nbodies == 32768) { system("echo 'Host:' `hostname` >> output32768 "); outputf = fopen("output32768", "a"); fprintf(outputf, "Tempo di esecuzione: %lf \n", clockEnd - clockStart); for (i = 0; i < nbodies; i++) { fprintf(outputf, "%lf, %lf, %lf \n", bodies[i]->pos[0], bodies[i]->pos[1], bodies[i]->pos[2]); } } else if (nbodies == 65536) { system("echo 'Host:' `hostname` >> output65536 "); outputf = fopen("output65536", "a"); fprintf(outputf, "Tempo di esecuzione: %lf \n", clockEnd - clockStart); for (i = 0; i < nbodies; i++) { fprintf(outputf, "%lf, %lf, %lf \n", bodies[i]->pos[0], bodies[i]->pos[1], bodies[i]->pos[2]); } } else { system("echo 'Host:' `hostname` >> output "); outputf = fopen("output", "a"); fprintf(outputf, "Tempo di esecuzione: %lf \n", clockEnd - clockStart); for (i = 0; i < nbodies; i++) { fprintf(outputf, "%lf, %lf, %lf \n", bodies[i]->pos[0], bodies[i]->pos[1], bodies[i]->pos[2]); } } fflush(outputf); fclose(outputf); printf("Esecuzione completata\n"); } else { int low = 1, up = 0; int i; dthf = 0.5 * dt; epssq = eps * eps; itolsq = 1.0 / (tol * tol); // if (PAPI_library_init(PAPI_VER_CURRENT) != PAPI_VER_CURRENT) { // printf("Inizializzazione della libreria di papi fallita \n"); // exit(1); // } // // if (PAPI_create_eventset(&event_set) != PAPI_OK) { // printf("E' andata a male la creazione dell'eventSet \n"); // exit(1); // } // // if (PAPI_add_events(event_set, events, 2) != PAPI_OK) { // printf("E' andata a male l'aggiunta degli eventi\n"); // exit(1); // } n_local_bodies = nbodies / n_proc; MPI_Bcast(leafs, nbodies, bodytype, 0, comm); int step = 0; root = NULL; low += (rank * n_local_bodies); up = low + n_local_bodies; // PAPI_start(event_set); // clockStart = PAPI_get_real_usec(); for (step = 0; step < steps; step++) { compute_center_and_diameter(); root = malloc(sizeof(struct node_t)); // "new" is like "malloc" root->type = 1; *(root->mass) = 0.0; root->pos = center; root->cell.childs[0] = NULL; root->cell.childs[1] = NULL; root->cell.childs[2] = NULL; root->cell.childs[3] = NULL; root->cell.childs[4] = NULL; root->cell.childs[5] = NULL; root->cell.childs[6] = NULL; root->cell.childs[7] = NULL; double radius = diameter * 0.5; for (i = 0; i < nbodies; i++) { bodies[i] = malloc(sizeof(node_t)); bodies[i]->cell.leaf = &leafs[i]; bodies[i]->mass = &leafs[i].mass; bodies[i]->pos = leafs[i].pos; insert(&(*root), &(*bodies[i]), radius); // questo è il modo per passare i dati per riferimento... cioè mandare l'indirizzo della struttura puntata dal puntatore } curr = 0; compute_center_of_mass(&(*root)); for (i = low; i < up; i++) { compute_force(&(*root), &(*bodies[i]), diameter, step); } // for (i = 0; i < nbodies; i++) { // } deallocate_tree(root); local_leafs = &leafs[low]; //inserire all_gather MPI_Allgather(local_leafs, up - low, bodytype, leafs, up - low, bodytype, comm); for (i = 0; i < nbodies; i++) { advance(&(*bodies[i])); } // int p = 0; // for (p = 0; p < nbodies; p++) // printf("%lf, %lf, %lf \n", bodies[p]->pos[0], bodies[p]->pos[1], // bodies[p]->pos[2]); // printf("*************************************** \n"); } // clockEnd = PAPI_get_real_usec(); // PAPI_stop(event_set, values); // int i = 0; // MPI_Send(bodies[low], up - low + 1, bodytype, 0, MPI_ANY_TAG, comm); } MPI_Finalize(); return 0; }
// ----------------------------------------------------------------------------- // Hauptprogramm int main(int argc, char *argv[]) { float *v; // Feld int iter; // Wiederholungen if (argc != 2) { // Benutzungshinweis printf ("Vector sorting\nUsage: %s <NumIter>\n", argv[0]); return 0; } iter = atoi(argv[1]); v = (float *) calloc(NUM, sizeof(float)); // Speicher reservieren // seriell printf("Perform vector sorting %d times...\n", iter); double start = omp_get_wtime(); for (int i = 0; i < iter; i++) { // Wiederhole das Sortieren for (int j = 0; j < NUM; j++) // Mit Zufallszahlen initialisieren v[j] = (float)rand(); quicksort(v, 0, NUM-1); // Sortierung } double s_t = omp_get_wtime() - start; printf ("Done. This task took %f seconds.\n", s_t); if (is_sorted(v)) printf ("The vector is sorted correctly.\n"); else printf ("The vector is NOT sorted correctly.\n"); printf ("------------------------------------------\n"); // parallel V1 start = omp_get_wtime(); for (int i = 0; i < iter; i++) { // Wiederhole das Sortieren for (int j = 0; j < NUM; j++) // Mit Zufallszahlen initialisieren v[j] = (float)rand(); quicksortP1(v, 0, NUM-1); // Sortierung } double p1_t = omp_get_wtime() - start; printf ("Parallel with sections done. This task took %f seconds.\n", p1_t); if (is_sorted(v)){ printf ("The vector is sorted correctly.\n"); printf ("Speedup: %f\n", s_t/p1_t); // AUFGABE 3: Berechne Speedup } else { printf ("The vector is NOT sorted correctly.\n"); } printf ("------------------------------------------\n"); // parallel V2 start = omp_get_wtime(); for (int i = 0; i < iter; i++) { // Wiederhole das Sortieren for (int j = 0; j < NUM; j++) // Mit Zufallszahlen initialisieren v[j] = (float)rand(); #pragma omp parallel { #pragma omp single nowait quicksortP2(v, 0, NUM-1); } // Sortierung } double p2_t = omp_get_wtime() - start; printf ("Parallel with tasks done. This task took %f seconds.\n", p2_t); if (is_sorted(v)) { printf ("The vector is sorted correctly.\n"); printf ("Speedup: %f\n", s_t/p2_t); // AUFGABE 3: Berechne Speedup } else { printf ("The vector is NOT sorted correctly.\n"); } printf ("------------------------------------------\n"); // parallel V3 start = omp_get_wtime(); for (int i = 0; i < iter; i++) { // Wiederhole das Sortieren for (int j = 0; j < NUM; j++) // Mit Zufallszahlen initialisieren v[j] = (float)rand(); quicksortP3(v, 0, NUM-1); } double p3_t = omp_get_wtime() - start; printf ("Parallel with tasks done. This task took %f seconds.\n", p3_t); if (is_sorted(v)) { printf ("The vector is sorted correctly.\n"); printf ("Speedup: %f\n", s_t/p3_t); // AUFGABE 3: Berechne Speedup } else { printf ("The vector is NOT sorted correctly.\n"); } printf ("------------------------------------------\n"); return 0; }
double SearchLambda(int *nn,double *eta,int len) { int i,j,k,ind[2]; int *preindex=malloc(sizeof(int)*len),*index=malloc(sizeof(int)*len); double step,*val=malloc(sizeof(double)*len),pre1,pre2,*val_c=malloc(sizeof(double)*len),del=0; double *preval=malloc(sizeof(double)*(1+len)); memcpy(preval,eta,sizeof(double)*(len+1)); for(i=0; i<len; i++) { val[i]=psi(nn[i]+eta[i+1])-psi(eta[i+1]); val_c[i]=val[i]; index[i]=i; del-=val[i]; } quicksort(val_c,index,0,len-1); //coarse to fine for(k=0;k<8;k++){ step=S_STEP*(int)pow(10,7-k); //for(k=0;k<7;k++){ //step=S_STEP*(int)pow(10,6-k); memset(preindex,0,len*sizeof(int)); while(samevector(preindex,index,len)==0) { memcpy(preindex,index,sizeof(int)*len); for(i=0; i<(int)len/2; i++) { ind[0]=index[i]; ind[1]=index[len-1-i]; index[i]=i; index[len-1-i]=len-1-i; /*for all pairs */ //printf("aaa %d,%d,%f,%f,%f,%f\n ",ind[0],ind[1],eta[1+ind[0]],eta[1+ind[1]],val[ind[0]],val[ind[1]]); // //if((eta[1+ind[0]]<0 || eta[1+ind[1]]<0)){ //printf("input err: %d,%f,%f,%f\n", k,eta[1+ind[0]], eta[1+ind[1]],step); //} if(val[ind[0]]>val[ind[1]]){ printf("sort err: %d,%d,%f,%f,%f,%f,%f\n", k,i,val_c[i],val_c[len-i-1],val[ind[0]], val[ind[1]],step); printv1(len,index); //printv1(len,nn); printvector(len,val_c); printvector(len,val); } while(val[ind[0]]<val[ind[1]] && eta[1+ind[0]]-step>0) { pre1=val[ind[0]]; pre2=val[ind[1]]; eta[1+ind[0]]-= step; eta[1+ind[1]]+= step; val[ind[0]]=psi(eta[1+ind[0]]+nn[ind[0]])-psi(eta[1+ind[0]]); val[ind[1]]=psi(eta[1+ind[1]]+nn[ind[1]])-psi(eta[1+ind[1]]); //if(eta[1+ind[0]]>0){ //printf("gg %d,%d,%f,%f,%f,%f\n ",ind[0],ind[1],eta[1+ind[0]],eta[1+ind[1]],val[ind[0]],val[ind[1]]); //} // if( eta[1+ind[0]]<0 || eta[1+ind[1]]<0){ // printf("iteration err: %d,%f,%f,%f,%f,%f\n", k,val[ind[0]],val[ind[1]],eta[1+ind[0]], eta[1+ind[1]],step); // } } //be careful about the equal case... if(val[ind[0]]>val[ind[1]]) { /*one step back*/ eta[1+ind[0]]+= step; eta[1+ind[1]]-= step; //if( eta[1+ind[0]]<0 || eta[1+ind[1]]<0){ //printf("add back err: %d,%f,%f,%f,%f,%f,%f,%f\n", k,val[ind[0]],val[ind[1]],pre1,pre2,eta[1+ind[0]], eta[1+ind[1]],step); //} val[ind[0]]=pre1; val[ind[1]]=pre2; //printf("fff %d,%d,%f,%f,%f,%f\n ",ind[0],ind[1],eta[1+ind[0]],eta[1+ind[1]],val[ind[0]],val[ind[1]]); //if(val[ind[0]]>val[ind[1]]) { // printf("pre_wrong...%d,%f,%f\n",k,val[ind[0]],val[ind[1]]); //} } } if (len%2==1){ index[(len-1)/2]=(len-1)/2; } memcpy(val_c,val,sizeof(double)*len); quicksort(val_c,index,0,len-1); //printv1(len,index); //printv1(len,nn); //printvector(len+1,eta); } } //printvector(len,val); //printvector(len+1,eta); for(i=0; i<len; i++) { del+=val[i]; } free(preindex); free(index); free(val); free(val_c); return del; }
//Synarthsh quick sort gia na mporesei na thn xrhsimopoihsei h quick.c anadromika void quicksort(Record * x,int first,int last,int atr_num) { int pivot,j,i; Record temp; if(first<last) { pivot=first; i=first; j=last; while(i<j) { if (atr_num==0) { while(x[i].ssn<=x[pivot].ssn&&i<last) { i++; } while(x[j].ssn>x[pivot].ssn) { j--; } if(i<j) { anathesh(&temp,x[i]); anathesh(&(x[i]),x[j]); anathesh(&(x[j]),temp); } } else if(atr_num==1) { while((strcmp(x[i].FirstName,x[pivot].FirstName)<=0)&&i<last) { i++; } while(strcmp(x[j].FirstName,x[pivot].FirstName)>0) { j--; } if(i<j) { anathesh(&temp,x[i]); anathesh(&(x[i]),x[j]); anathesh(&(x[j]),temp); } } else if(atr_num==2) { while((strcmp(x[i].LastName,x[pivot].LastName)<=0)&&i<last) { i++; } while(strcmp(x[j].LastName,x[pivot].LastName)>0) { j--; } if(i<j) { anathesh(&temp,x[i]); anathesh(&(x[i]),x[j]); anathesh(&(x[j]),temp); } } else if(atr_num==3) { while(x[i].income<=x[pivot].income&&i<last) { i++; } while(x[j].income>x[pivot].income) { j--; } if(i<j) { anathesh(&temp,x[i]); anathesh(&(x[i]),x[j]); anathesh(&(x[j]),temp); } } } anathesh(&temp,x[pivot]); anathesh(&(x[pivot]),x[j]); anathesh(&(x[j]),temp); quicksort(x,first,j-1,atr_num); quicksort(x,j+1,last,atr_num); } }
void QuickSort::sort(int array[], int size) { quicksort(array, 0, size - 1); }
int main() { // Initialize RNG dsfmt_gv_init_gen_rand(0); double t, tmin; // fib(20) assert(fib(20) == 6765); int f = 0; tmin = INFINITY; volatile int fibarg = 20; // prevent constant propagation for (int i=0; i<NITER; ++i) { t = clock_now(); for (int j = 0; j < 1000; j++) f += fib(fibarg); t = clock_now()-t; if (t < tmin) tmin = t; } print_perf("fib", tmin / 1000); // parse_bin tmin = INFINITY; for (int i=0; i<NITER; ++i) { t = clock_now(); char s[11]; for (int k=0; k<1000 * 100; ++k) { uint32_t n = dsfmt_gv_genrand_uint32(); sprintf(s, "%x", n); uint32_t m = (uint32_t)parse_int(s, 16); assert(m == n); } t = clock_now()-t; if (t < tmin) tmin = t; } print_perf("parse_int", tmin / 100); // // array constructor // tmin = INFINITY; // for (int i=0; i<NITER; ++i) { // t = clock_now(); // double *a = ones(200,200); // free(a); // t = clock_now()-t; // if (t < tmin) tmin = t; // } // print_perf("ones", tmin); // // // A*A' // //SUBROUTINE DGEMM(TRANSA,TRANSB,M,N,K,ALPHA,A,LDA,B,LDB,BETA,C,LDC) // double *b = ones(200, 200); // tmin = INFINITY; // for (int i=0; i<NITER; ++i) { // t = clock_now(); // double *c = matmul_aat(200, b); // free(c); // t = clock_now()-t; // if (t < tmin) tmin = t; // } // free(b); // print_perf("AtA", tmin); // mandel /* The initialization on the next line is deliberately volatile to * prevent gcc from optimizing away the entire loop. * (First observed in gcc 4.9.2) */ static volatile int mandel_sum_init = 0; int mandel_sum2 = mandel_sum_init; tmin = INFINITY; for (int i=0; i<NITER; ++i) { int *M; t = clock_now(); for (int j = 0; j < 100; j++) { M = mandelperf(); if (j == 0) { int mandel_sum = 0; // for (int ii = 0; ii < 21; ii++) { // for (int jj = 0; jj < 26; jj++) { // printf("%4d", M[26*ii + jj]); // } // printf("\n"); // } for (int k = 0; k < 21*26; k++) { mandel_sum += M[k]; } assert(mandel_sum == 14791); mandel_sum2 += mandel_sum; } free(M); } t = clock_now()-t; if (t < tmin) tmin = t; } assert(mandel_sum2 == 14791 * NITER); print_perf("mandel", tmin / 100); // sort tmin = INFINITY; for (int i=0; i<NITER; ++i) { t = clock_now(); double *d = myrand(5000); quicksort(d, 0, 5000-1); free(d); t = clock_now()-t; if (t < tmin) tmin = t; } print_perf("quicksort", tmin); // pi sum double pi; tmin = INFINITY; for (int i=0; i<NITER; ++i) { t = clock_now(); pi = pisum(); t = clock_now()-t; if (t < tmin) tmin = t; } assert(fabs(pi-1.644834071848065) < 1e-12); print_perf("pi_sum", tmin); // rand mat stat struct double_pair r; tmin = INFINITY; for (int i=0; i<NITER; ++i) { t = clock_now(); r = randmatstat(1000); t = clock_now()-t; if (t < tmin) tmin = t; } // assert(0.5 < r.s1 && r.s1 < 1.0 && 0.5 < r.s2 && r.s2 < 1.0); print_perf("rand_mat_stat", tmin); // rand mat mul tmin = INFINITY; for (int i=0; i<NITER; ++i) { t = clock_now(); double *C = randmatmul(1000); assert(0 <= C[0]); free(C); t = clock_now()-t; if (t < tmin) tmin = t; } print_perf("rand_mat_mul", tmin); // printfd tmin = INFINITY; for (int i=0; i<NITER; ++i) { t = clock_now(); printfd(100000); t = clock_now()-t; if (t < tmin) tmin = t; } print_perf("printfd", tmin); return 0; }
void *quicksort(void *arg) { int pivot; int i,j,tmp; pthread_t lthread, rthread; // used only for keeping initial field values struct qs qs_info; // the address of the struct is passed as a function parameter // and for this reason we dont want the same address in both qs calls struct qs *qs_left; struct qs *qs_right; struct qs *qs_temp; // we use heap memory instead of static to avoid loss of local data // before being read from their callees qs_left = (struct qs *)malloc(sizeof(struct qs)); qs_right = (struct qs *)malloc(sizeof(struct qs)); // intializing struct fields qs_temp = (struct qs *)arg; qs_info.left = qs_temp->left; qs_info.right = qs_temp->right; qs_info.curr_layer = qs_temp->curr_layer; qs_left->curr_layer = qs_temp->curr_layer; qs_right->curr_layer = qs_temp->curr_layer; qs_info.max_layer = qs_temp->max_layer; qs_left->max_layer = qs_temp->max_layer; qs_right->max_layer = qs_temp->max_layer; free((struct qs *)arg); trace(&qs_info); // pivot calculation if (qs_info.left < qs_info.right) { pivot = qs_info.left; i = qs_info.left; j = qs_info.right; while(i<j) { while((a[i]<=a[pivot]) && (i < qs_info.right)) i++; while(a[j] > a[pivot]) j--; if (i<j){ tmp = a[i]; a[i] = a[j]; a[j] = tmp; } } tmp = a[pivot]; a[pivot] = a[j]; a[j] = tmp; //decide wether to continue creating threads or not if (qs_info.curr_layer == qs_info.max_layer) { qs_left->left = qs_info.left; qs_left->right = j-1; quicksort(qs_left); qs_right->left = j+1; qs_right->right = qs_info.right; quicksort(qs_right); } else { qs_left->curr_layer++; qs_left->left = qs_info.left; qs_left->right = j-1; trace(qs_left); if(pthread_create(<hread, NULL, quicksort, qs_left)) { perror("pthread_create"); exit(1); } qs_right->curr_layer++; qs_right->left = j+1; qs_right->right = qs_info.right; trace(qs_right); if(pthread_create(&rthread, NULL, quicksort, qs_right)) { perror("pthread_create"); exit(1); } } } return(NULL); }
void qsort_wrapper(int a[], size_t n) { quicksort(a, 0, n-1); }
/* sorts items alphabetically and indexes them */ void menu_sort_menu(menu_st *menulist) { quicksort(menulist->items, 0, menulist->num_items-1); }
void sort(Item a[], int l, int r) { quicksort(a, l, r); }
mxArray* ompcore(double D[], double x[], double DtX[], double XtX[], double G[], mwSize n, mwSize m, mwSize L, int T, double eps, int gamma_mode, int profile, double msg_delta, int erroromp) { profdata pd; mxArray *Gamma; mwIndex i, j, signum, pos, *ind, *gammaIr, *gammaJc, gamma_count; mwSize allocated_coefs, allocated_cols; int DtX_specified, XtX_specified, batchomp, standardomp, *selected_atoms; double *alpha, *r, *Lchol, *c, *Gsub, *Dsub, sum, *gammaPr, *tempvec1, *tempvec2; double eps2, resnorm, delta, deltaprev, secs_remain; int mins_remain, hrs_remain; clock_t lastprint_time, starttime; /*** status flags ***/ DtX_specified = (DtX!=0); /* indicates whether D'*x was provided */ XtX_specified = (XtX!=0); /* indicates whether sum(x.*x) was provided */ standardomp = (G==0); /* batch-omp or standard omp are selected depending on availability of G */ batchomp = !standardomp; /*** allocate output matrix ***/ if (gamma_mode == FULL_GAMMA) { /* allocate full matrix of size m X L */ Gamma = mxCreateDoubleMatrix(m, L, mxREAL); gammaPr = mxGetPr(Gamma); gammaIr = 0; gammaJc = 0; } else { /* allocate sparse matrix with room for allocated_coefs nonzeros */ /* for error-omp, begin with L*sqrt(n)/2 allocated nonzeros, otherwise allocate L*T nonzeros */ allocated_coefs = erroromp ? (mwSize)(ceil(L*sqrt((double)n)/2.0) + 1.01) : L*T; Gamma = mxCreateSparse(m, L, allocated_coefs, mxREAL); gammaPr = mxGetPr(Gamma); gammaIr = mxGetIr(Gamma); gammaJc = mxGetJc(Gamma); gamma_count = 0; gammaJc[0] = 0; } /*** helper arrays ***/ alpha = (double*)mxMalloc(m*sizeof(double)); /* contains D'*residual */ ind = (mwIndex*)mxMalloc(n*sizeof(mwIndex)); /* indices of selected atoms */ selected_atoms = (int*)mxMalloc(m*sizeof(int)); /* binary array with 1's for selected atoms */ c = (double*)mxMalloc(n*sizeof(double)); /* orthogonal projection result */ /* current number of columns in Dsub / Gsub / Lchol */ allocated_cols = erroromp ? (mwSize)(ceil(sqrt((double)n)/2.0) + 1.01) : T; /* Cholesky decomposition of D_I'*D_I */ Lchol = (double*)mxMalloc(n*allocated_cols*sizeof(double)); /* temporary vectors for various computations */ tempvec1 = (double*)mxMalloc(m*sizeof(double)); tempvec2 = (double*)mxMalloc(m*sizeof(double)); if (batchomp) { /* matrix containing G(:,ind) - the columns of G corresponding to the selected atoms, in order of selection */ Gsub = (double*)mxMalloc(m*allocated_cols*sizeof(double)); } else { /* matrix containing D(:,ind) - the selected atoms from D, in order of selection */ Dsub = (double*)mxMalloc(n*allocated_cols*sizeof(double)); /* stores the residual */ r = (double*)mxMalloc(n*sizeof(double)); } if (!DtX_specified) { /* contains D'*x for the current signal */ DtX = (double*)mxMalloc(m*sizeof(double)); } /*** initializations for error omp ***/ if (erroromp) { eps2 = eps*eps; /* compute eps^2 */ if (T<0 || T>n) { /* unspecified max atom num - set max atoms to n */ T = n; } } /*** initialize timers ***/ initprofdata(&pd); /* initialize profiling counters */ starttime = clock(); /* record starting time for eta computations */ lastprint_time = starttime; /* time of last status display */ /********************** perform omp for each signal **********************/ for (signum=0; signum<L; ++signum) { /* compute DtX */ if (!DtX_specified) { matT_vec(1, D, x+n*signum, DtX, n, m); addproftime(&pd, DtX_TIME); } /* initialize alpha := DtX */ memcpy(alpha, DtX + m*signum*DtX_specified, m*sizeof(double)); /* mark all atoms as unselected */ for (i=0; i<m; ++i) { selected_atoms[i] = 0; } /* initialize residual norm and deltaprev for error-omp */ if (erroromp) { if (XtX_specified) { resnorm = XtX[signum]; } else { resnorm = dotprod(x+n*signum, x+n*signum, n); addproftime(&pd, XtX_TIME); } deltaprev = 0; /* delta tracks the value of gamma'*G*gamma */ } else { /* ignore residual norm stopping criterion */ eps2 = 0; resnorm = 1; } /* main loop */ i=0; while (resnorm>eps2 && i<T) { /* index of next atom */ pos = maxabs(alpha, m); addproftime(&pd, MAXABS_TIME); /* stop criterion: selected same atom twice, or inner product too small */ if (selected_atoms[pos] || alpha[pos]*alpha[pos]<1e-14) { break; } /* mark selected atom */ ind[i] = pos; selected_atoms[pos] = 1; /* matrix reallocation */ if (erroromp && i>=allocated_cols) { allocated_cols = (mwSize)(ceil(allocated_cols*MAT_INC_FACTOR) + 1.01); Lchol = (double*)mxRealloc(Lchol,n*allocated_cols*sizeof(double)); batchomp ? (Gsub = (double*)mxRealloc(Gsub,m*allocated_cols*sizeof(double))) : (Dsub = (double*)mxRealloc(Dsub,n*allocated_cols*sizeof(double))) ; } /* append column to Gsub or Dsub */ if (batchomp) { memcpy(Gsub+i*m, G+pos*m, m*sizeof(double)); } else { memcpy(Dsub+i*n, D+pos*n, n*sizeof(double)); } /*** Cholesky update ***/ if (i==0) { *Lchol = 1; } else { /* incremental Cholesky decomposition: compute next row of Lchol */ if (standardomp) { matT_vec(1, Dsub, D+n*pos, tempvec1, n, i); /* compute tempvec1 := Dsub'*d where d is new atom */ addproftime(&pd, DtD_TIME); } else { vec_assign(tempvec1, Gsub+i*m, ind, i); /* extract tempvec1 := Gsub(ind,i) */ } backsubst('L', Lchol, tempvec1, tempvec2, n, i); /* compute tempvec2 = Lchol \ tempvec1 */ for (j=0; j<i; ++j) { /* write tempvec2 to end of Lchol */ Lchol[j*n+i] = tempvec2[j]; } /* compute Lchol(i,i) */ sum = 0; for (j=0; j<i; ++j) { /* compute sum of squares of last row without Lchol(i,i) */ sum += SQR(Lchol[j*n+i]); } if ( (1-sum) <= 1e-14 ) { /* Lchol(i,i) is zero => selected atoms are dependent */ break; } Lchol[i*n+i] = sqrt(1-sum); } addproftime(&pd, LCHOL_TIME); i++; /* perform orthogonal projection and compute sparse coefficients */ vec_assign(tempvec1, DtX + m*signum*DtX_specified, ind, i); /* extract tempvec1 = DtX(ind) */ cholsolve('L', Lchol, tempvec1, c, n, i); /* solve LL'c = tempvec1 for c */ addproftime(&pd, COMPCOEF_TIME); /* update alpha = D'*residual */ if (standardomp) { mat_vec(-1, Dsub, c, r, n, i); /* compute r := -Dsub*c */ vec_sum(1, x+n*signum, r, n); /* compute r := x+r */ /*memcpy(r, x+n*signum, n*sizeof(double)); /* assign r := x */ /*mat_vec1(-1, Dsub, c, 1, r, n, i); /* compute r := r-Dsub*c */ addproftime(&pd, COMPRES_TIME); matT_vec(1, D, r, alpha, n, m); /* compute alpha := D'*r */ addproftime(&pd, DtR_TIME); /* update residual norm */ if (erroromp) { resnorm = dotprod(r, r, n); addproftime(&pd, UPDATE_RESNORM_TIME); } } else { mat_vec(1, Gsub, c, tempvec1, m, i); /* compute tempvec1 := Gsub*c */ memcpy(alpha, DtX + m*signum*DtX_specified, m*sizeof(double)); /* set alpha = D'*x */ vec_sum(-1, tempvec1, alpha, m); /* compute alpha := alpha - tempvec1 */ addproftime(&pd, UPDATE_DtR_TIME); /* update residual norm */ if (erroromp) { vec_assign(tempvec2, tempvec1, ind, i); /* assign tempvec2 := tempvec1(ind) */ delta = dotprod(c,tempvec2,i); /* compute c'*tempvec2 */ resnorm = resnorm - delta + deltaprev; /* residual norm update */ deltaprev = delta; addproftime(&pd, UPDATE_RESNORM_TIME); } } } /*** generate output vector gamma ***/ if (gamma_mode == FULL_GAMMA) { /* write the coefs in c to their correct positions in gamma */ for (j=0; j<i; ++j) { gammaPr[m*signum + ind[j]] = c[j]; } } else { /* sort the coefs by index before writing them to gamma */ quicksort(ind,c,i); addproftime(&pd, INDEXSORT_TIME); /* gamma is full - reallocate */ if (gamma_count+i >= allocated_coefs) { while(gamma_count+i >= allocated_coefs) { allocated_coefs = (mwSize)(ceil(GAMMA_INC_FACTOR*allocated_coefs) + 1.01); } mxSetNzmax(Gamma, allocated_coefs); mxSetPr(Gamma, mxRealloc(gammaPr, allocated_coefs*sizeof(double))); mxSetIr(Gamma, mxRealloc(gammaIr, allocated_coefs*sizeof(mwIndex))); gammaPr = mxGetPr(Gamma); gammaIr = mxGetIr(Gamma); } /* append coefs to gamma and update the indices */ for (j=0; j<i; ++j) { gammaPr[gamma_count] = c[j]; gammaIr[gamma_count] = ind[j]; gamma_count++; } gammaJc[signum+1] = gammaJc[signum] + i; } /*** display status messages ***/ if (msg_delta>0 && (clock()-lastprint_time)/(double)CLOCKS_PER_SEC >= msg_delta) { lastprint_time = clock(); /* estimated remainig time */ secs2hms( ((L-signum-1)/(double)(signum+1)) * ((lastprint_time-starttime)/(double)CLOCKS_PER_SEC) , &hrs_remain, &mins_remain, &secs_remain); mexPrintf("omp: signal %d / %d, estimated remaining time: %02d:%02d:%05.2f\n", signum+1, L, hrs_remain, mins_remain, secs_remain); mexEvalString("drawnow;"); } } /* end omp */ /*** print final messages ***/ if (msg_delta>0) { mexPrintf("omp: signal %d / %d\n", signum, L); } if (profile) { printprofinfo(&pd, erroromp, batchomp, L); } /* free memory */ if (!DtX_specified) { mxFree(DtX); } if (standardomp) { mxFree(r); mxFree(Dsub); } else { mxFree(Gsub); } mxFree(tempvec2); mxFree(tempvec1); mxFree(Lchol); mxFree(c); mxFree(selected_atoms); mxFree(ind); mxFree(alpha); return Gamma; }
void quicksort(int a[], int l, int r){ int i; i = partition(a, l, r); quicksort(a, l, i-1); quicksort(a, i+1, r); }
void main() { int i,j,k,num,a[max]; fp1=fopen("BIGFILE","w"); for(i=0;i<64*max;i++) { num=rand() % 1000; fprintf(fp1,"%d\n",num); } fclose(fp1); fp1=fopen("BIGFILE","r"); rewind(fp1); for(i=1;i<=64;i++) { name[4]=i+48; fp2=fopen(name,"w"); for(j=0;j<max;j++) { fscanf(fp1,"%d",&num); fprintf(fp2,"%d\n",num); } fclose(fp2); } fclose(fp1); for(i=1;i<=64;i++) { name[4]=i+48; fp2=fopen(name,"r"); for(j=0;j<max;j++) { fscanf(fp2,"%d",&a[j]); } fclose(fp2); quicksort(a,0,max-1); fp2=fopen(name,"w"); for(j=0;j<max;j++) { fprintf(fp2,"%d\n",a[j]); } fclose(fp2); } i=1; for(k=0;k<8;k++) //8 way merge { name2[5]=k+48+1; FILE *f1,*f2,*f3,*f4,*f5,*f6,*f7,*f8,*f88; int a1,a2,a3,a4,a5,a6,a7,a8,arr[8],pos,min,z; name[4]=i+48; f1=fopen(name,"r"); rewind(f1); fscanf(f1,"%d",&a1); arr[0]=a1; i++; name[4]=i+48; f2=fopen(name,"r"); rewind(f2); fscanf(f2,"%d",&a2); arr[1]=a2; i++; name[4]=i+48; f3=fopen(name,"r"); rewind(f3); fscanf(f3,"%d",&a3); arr[2]=a3; i++; name[4]=i+48; f4=fopen(name,"r"); rewind(f4); fscanf(f4,"%d",&a4); arr[3]=a4; i++; name[4]=i+48; f5=fopen(name,"r"); rewind(f5); fscanf(f5,"%d",&a5); arr[4]=a5; i++; name[4]=i+48; f6=fopen(name,"r"); rewind(f6); fscanf(f6,"%d",&a6); arr[5]=a6; i++; name[4]=i+48; f7=fopen(name,"r"); rewind(f7); fscanf(f7,"%d",&a7); arr[6]=a7; i++; name[4]=i+48; f8=fopen(name,"r"); rewind(f8); fscanf(f8,"%d",&a8); arr[7]=a8; i++; f88=fopen(name2,"w"); for(z=0;z<8*max;z++) { pos=minpos(arr,8); min=minele(arr,8); printf("%d--%d ",pos,min); fprintf(f88,"%d\n",min); if(pos==0) fscanf(f1,"%d",&arr[0]); else if(pos==1) fscanf(f2,"%d",&arr[1]); else if(pos==2) fscanf(f3,"%d",&arr[2]); else if(pos==3) fscanf(f4,"%d",&arr[3]); else if(pos==4) fscanf(f5,"%d",&arr[4]); else if(pos==5) fscanf(f6,"%d",&arr[5]); else if(pos==6) fscanf(f7,"%d",&arr[6]); else if(pos==7) fscanf(f8,"%d",&arr[7]); } fclose(f88); } i=1; for(k=0;k<4;k++) //4 way merge { name3[5]=k+48+1; FILE *f1,*f2,*f3,*f4,*f44; int a1,a2,a3,a4,arr[4],pos,min,z; name2[5]=i+48; f1=fopen(name2,"r"); rewind(f1); fscanf(f1,"%d",&a1); arr[0]=a1; i++; name2[5]=i+48; f2=fopen(name2,"r"); rewind(f2); fscanf(f2,"%d",&a2); arr[1]=a2; i++; name2[5]=i+48; f3=fopen(name2,"r"); fscanf(f3,"%d",&a3); rewind(f3); arr[2]=a3; i++; name2[5]=i+48; f4=fopen(name2,"r"); rewind(f4); fscanf(f4,"%d",&a4); arr[3]=a4; i++; f44=fopen(name3,"w"); for(z=0;z<4*max;z++) { pos=minpos(arr,4); min=minele(arr,4); printf("%d--%d ",pos,min); fprintf(f44,"%d\n",min); if(pos==0) fscanf(f1,"%d",&arr[0]); else if(pos==1) fscanf(f2,"%d",&arr[1]); else if(pos==2) fscanf(f3,"%d",&arr[2]); else if(pos==3) fscanf(f4,"%d",&arr[3]); } fclose(f44); i=1; } i=1; for(k=0;k<1;k++) //2 way merge { name4[5]=k+48+1; FILE *f1,*f2,*f22; int a1,a2,arr[2],pos,min,z; name3[5]=i+48; f1=fopen(name3,"r"); rewind(f1); fscanf(f1,"%d",&a1); arr[0]=a1; i++; name3[5]=i+48; f2=fopen(name3,"r"); rewind(f2); fscanf(f2,"%d",&a2); arr[1]=a2; i++; f22=fopen(name4,"w"); for(z=0;z<2*max;z++) { pos=minpos(arr,2); min=minele(arr,2); printf("%d--%d ",pos,min); fprintf(f22,"%d\n",min); if(pos==0) fscanf(f1,"%d",&arr[0]); else if(pos==1) fscanf(f2,"%d",&arr[1]); } fclose(f22); } }
void sort (array_t &v) { quicksort(v, 0, v.size()); }
void quicksort(int array[], size_t size) { quicksort(array, 0, size); }
/* Main method: -generate random list -time sequential quicksort -time parallel quicksort -time standard qsort */ int main(int argc, char *argv[]) { struct timeval start, end; double diff; srand(time(NULL)); //seed random int NUM = DNUM; int THREAD_LEVEL = T_LEVEL; if (argc == 2) //user specified list size. { NUM = atoi(argv[1]); } else if (argc == 3) { /* code */ NUM = atoi(argv[1]); THREAD_LEVEL = atoi(argv[2]); } //Want to compare sorting on the same list, //so backup. double *lystbck = (double *) malloc(NUM * sizeof(double)); double *lyst = (double *) malloc(NUM * sizeof(double)); //Populate random original/backup list. for (int i = 0; i < NUM; i++) { lystbck[i] = 1.0 * rand() / RAND_MAX; } //copy list. memcpy(lyst, lystbck, NUM * sizeof(double)); //Sequential mergesort, and timing. gettimeofday(&start, NULL); quicksort(lyst, NUM); gettimeofday(&end, NULL); if (!isSorted(lyst, NUM)) { printf("Oops, lyst did not get sorted by quicksort.\n"); } //Compute time difference. diff = ((end.tv_sec * 1000000 + end.tv_usec) - (start.tv_sec * 1000000 + start.tv_usec)) / 1000000.0; printf("Sequential quicksort took: %lf sec.\n", diff); //Now, parallel quicksort. //copy list. memcpy(lyst, lystbck, NUM * sizeof(double)); gettimeofday(&start, NULL); parallelQuicksort(lyst, NUM, THREAD_LEVEL); gettimeofday(&end, NULL); if (!isSorted(lyst, NUM)) { printf("Oops, lyst did not get sorted by parallelQuicksort.\n"); } //Compute time difference. diff = ((end.tv_sec * 1000000 + end.tv_usec) - (start.tv_sec * 1000000 + start.tv_usec)) / 1000000.0; printf("Parallel quicksort took: %lf sec.\n", diff); //Finally, built-in for reference: memcpy(lyst, lystbck, NUM * sizeof(double)); gettimeofday(&start, NULL); qsort(lyst, NUM, sizeof(double), compare_doubles); gettimeofday(&end, NULL); if (!isSorted(lyst, NUM)) { printf("Oops, lyst did not get sorted by qsort.\n"); } //Compute time difference. diff = ((end.tv_sec * 1000000 + end.tv_usec) - (start.tv_sec * 1000000 + start.tv_usec)) / 1000000.0; printf("Built-in quicksort took: %lf sec.\n", diff); free(lyst); free(lystbck); pthread_exit(NULL); }
int gmres_dr(spinor * const P,spinor * const Q, const int m, const int nr_ev, const int max_restarts, const double eps_sq, const int rel_prec, const int N, matrix_mult f){ int restart=0, i, j, k, l; double beta, eps, norm, beta2=0.; complex *lswork = NULL; int lwork; complex tmp1, tmp2; int info=0; int _m = m, mp1 = m+1, np1 = nr_ev+1, ne = nr_ev, V2 = 12*(VOLUMEPLUSRAND)/2, _N = 12*N; spinor ** solver_field = NULL; const int nr_sf = 3; if(N == VOLUME) { init_solver_field(&solver_field, VOLUMEPLUSRAND, nr_sf); } else { init_solver_field(&solver_field, VOLUMEPLUSRAND/2, nr_sf); } double err=0.; spinor * r0, * x0; cmone.re = -1.; cmone.im=0.; cpone.re = 1.; cpone.im=0.; czero.re = 0.; czero.im = 0.; r0 = solver_field[0]; x0 = solver_field[2]; eps=sqrt(eps_sq); init_gmres_dr(m, (VOLUMEPLUSRAND)); norm = sqrt(square_norm(Q, N, 1)); assign(x0, P, N); /* first normal GMRES cycle */ /* r_0=Q-AP (b=Q, x+0=P) */ f(r0, x0); diff(r0, Q, r0, N); /* v_0=r_0/||r_0|| */ alpha[0].re=sqrt(square_norm(r0, N, 1)); err = alpha[0].re; if(g_proc_id == g_stdio_proc && g_debug_level > 0){ printf("%d\t%e true residue\n", restart*m, alpha[0].re*alpha[0].re); fflush(stdout); } if(alpha[0].re==0.){ assign(P, x0, N); finalize_solver(solver_field, nr_sf); return(restart*m); } mul_r(V[0], 1./alpha[0].re, r0, N); for(j = 0; j < m; j++){ /* solver_field[0]=A*v_j */ /* Set h_ij and omega_j */ /* solver_field[1] <- omega_j */ f(solver_field[1], V[j]); /* assign(solver_field[1], solver_field[0], N); */ for(i = 0; i <= j; i++){ H[i][j] = scalar_prod(V[i], solver_field[1], N, 1); /* G, work and work2 are in Fortran storage: columns first */ G[j][i] = H[i][j]; work2[j][i] = H[i][j]; work[i][j].re = H[i][j].re; work[i][j].im = -H[i][j].im; assign_diff_mul(solver_field[1], V[i], H[i][j], N); } _complex_set(H[j+1][j], sqrt(square_norm(solver_field[1], N, 1)), 0.); G[j][j+1] = H[j+1][j]; work2[j][j+1] = H[j+1][j]; work[j+1][j].re = H[j+1][j].re; work[j+1][j].im = -H[j+1][j].im; beta2 = H[j+1][j].re*H[j+1][j].re; for(i = 0; i < j; i++){ tmp1 = H[i][j]; tmp2 = H[i+1][j]; _mult_real(H[i][j], tmp2, s[i]); _add_assign_complex_conj(H[i][j], c[i], tmp1); _mult_real(H[i+1][j], tmp1, s[i]); _diff_assign_complex(H[i+1][j], c[i], tmp2); } /* Set beta, s, c, alpha[j],[j+1] */ beta = sqrt(_complex_square_norm(H[j][j]) + _complex_square_norm(H[j+1][j])); s[j] = H[j+1][j].re / beta; _mult_real(c[j], H[j][j], 1./beta); _complex_set(H[j][j], beta, 0.); _mult_real(alpha[j+1], alpha[j], s[j]); tmp1 = alpha[j]; _mult_assign_complex_conj(alpha[j], c[j], tmp1); /* precision reached? */ if(g_proc_id == g_stdio_proc && g_debug_level > 0){ printf("%d\t%e residue\n", restart*m+j, alpha[j+1].re*alpha[j+1].re); fflush(stdout); } if(((alpha[j+1].re <= eps) && (rel_prec == 0)) || ((alpha[j+1].re <= eps*norm) && (rel_prec == 1))){ _mult_real(alpha[j], alpha[j], 1./H[j][j].re); assign_add_mul(x0, V[j], alpha[j], N); for(i = j-1; i >= 0; i--){ for(k = i+1; k <= j; k++){ _mult_assign_complex(tmp1, H[i][k], alpha[k]); /* alpha[i] -= tmp1 */ _diff_complex(alpha[i], tmp1); } _mult_real(alpha[i], alpha[i], 1./H[i][i].re); assign_add_mul(x0, V[i], alpha[i], N); } for(i = 0; i < m; i++){ alpha[i].im = 0.; } assign(P, x0, N); finalize_solver(solver_field, nr_sf); return(restart*m+j); } /* if not */ else { mul_r(V[(j+1)], 1./H[j+1][j].re, solver_field[1], N); } } j=m-1; /* prepare for restart */ _mult_real(alpha[j], alpha[j], 1./H[j][j].re); assign_add_mul(x0, V[j], alpha[j], N); if(g_proc_id == 0 && g_debug_level > 3) { printf("alpha: %e %e\n", alpha[j].re, alpha[j].im); } for(i = j-1; i >= 0; i--){ for(k = i+1; k <= j; k++){ _mult_assign_complex(tmp1, H[i][k], alpha[k]); _diff_complex(alpha[i], tmp1); } _mult_real(alpha[i], alpha[i], 1./H[i][i].re); if(g_proc_id == 0 && g_debug_level > 3) { printf("alpha: %e %e\n", alpha[i].re, alpha[i].im); } assign_add_mul(x0, V[i], alpha[i], N); } /* This produces c=V_m+1*r0 */ for(i = 0; i < mp1; i++) { c[i] = scalar_prod(V[i], r0, N, 1); if(g_proc_id == 0 && g_debug_level > 3) { printf("c: %e %e err = %e\n", c[i].re, c[i].im, err); } } for(restart = 1; restart < max_restarts; restart++) { /* compute c-\bar H \alpha */ _FT(zgemv) ("N", &mp1, &_m, &cmone, G[0], &mp1, alpha, &one, &cpone, c, &one, 1); err = sqrt(short_scalar_prod(c, c, mp1).re); if(g_proc_id == 0 && g_debug_level > 0) { printf("%d\t %e short residue\n", m*restart, err*err); } /* Compute new residual r0 */ /* r_0=Q-AP (b=Q, x+0=P) */ if(g_debug_level > 0) { f(r0, x0); diff(r0, Q, r0, N); tmp1.im=sqrt(square_norm(r0, N, 1)); if(g_proc_id == g_stdio_proc){ printf("%d\t%e true residue\n", m*restart, tmp1.im*tmp1.im); fflush(stdout); } } mul(r0, c[0], V[0], N); for(i = 1; i < mp1; i++) { assign_add_mul(r0, V[i], c[i], N); } if(g_debug_level > 3) { tmp1.im=sqrt(square_norm(r0, N, 1)); if(g_proc_id == g_stdio_proc){ printf("%d\t%e residue\n", m*restart, tmp1.im*tmp1.im); fflush(stdout); } } /* Stop if satisfied */ if(err < eps){ assign(P, x0, N); finalize_solver(solver_field, nr_sf); return(restart*m); } /* Prepare to compute harmonic Ritz pairs */ for(i = 0; i < m-1; i++){ alpha[i].re = 0.; alpha[i].im = 0.; } alpha[m-1].re = 1.; alpha[m-1].im = 0.; _FT(zgesv) (&_m, &one, work[0], &mp1, idx, alpha, &_m, &info); for(i = 0; i < m; i++) { G[m-1][i].re += (beta2*alpha[idx[i]-1].re); G[m-1][i].im += (beta2*alpha[idx[i]-1].im); } if(g_proc_id == 0 && g_debug_level > 3){ printf("zgesv returned info = %d, c[m-1]= %e, %e , idx[m-1]=%d\n", info, alpha[idx[m-1]-1].re, alpha[idx[m-1]-1].im, idx[m-1]); } /* c - \bar H * d -> c */ /* G contains H + \beta^2 H^-He_n e_n^H */ /* Compute harmonic Ritz pairs */ diagonalise_general_matrix(m, G[0], mp1, alpha, evalues); for(i = 0; i < m; i++) { sortarray[i] = _complex_square_norm(evalues[i]); idx[i] = i; } quicksort(m, sortarray, idx); if(g_proc_id == g_stdio_proc && g_debug_level > 1) { for(i = 0; i < m; i++) { printf("# Evalues %d %e %e \n", i, evalues[idx[i]].re, evalues[idx[i]].im); } fflush(stdout); } /* Copy the first nr_ev eigenvectors to work */ for(i = 0; i < ne; i++) { for(l = 0; l < m; l++) { work[i][l] = G[idx[i]][l]; } } /* Orthonormalize them */ for(i = 0; i < ne; i++) { work[i][m].re = 0.; work[i][m].im = 0.; short_ModifiedGS(work[i], m, i, work[0], mp1); } /* Orthonormalize c - \bar H d to work */ short_ModifiedGS(c, m+1, ne, work[0], mp1); for(i = 0; i < mp1; i++) { work[nr_ev][i] = c[i]; } /* Now compute \bar H = P^T_k+1 \bar H_m P_k */ for(i = 0; i < mp1; i++) { for(l = 0; l < mp1; l++) { H[i][l].re = 0.; H[i][l].im = 0.; } } _FT(zgemm) ("N", "N", &mp1, &ne, &_m, &cpone, work2[0], &mp1, work[0], &mp1, &czero, G[0], &mp1, 1, 1); _FT(zgemm) ("C", "N", &np1, &ne , &mp1, &cpone, work[0], &mp1, G[0], &mp1, &czero, H[0], &mp1, 1, 1); if(g_debug_level > 3) { for(i = 0; i < ne+1; i++) { for(l = 0; l < ne+1; l++) { if(g_proc_id == 0) { printf("(g[%d], g[%d]) = %e, %e\n", i, l, short_scalar_prod(work[i], work[l], m+1).re, short_scalar_prod(work[i], work[l], m+1).im); printf("(g[%d], g[%d]) = %e, %e\n", l, i, short_scalar_prod(work[l], work[i], m+1).re, short_scalar_prod(work[l], work[i], m+1).im); } } } } /* V_k+1 = V_m+1 P_k+1 */ /* _FT(zgemm) ("N", "N", &_N, &np1, &mp1, &cpone, (complex*)V[0], &V2, work[0], &mp1, &czero, (complex*)Z[0], &V2, 1, 1); */ for(l = 0; l < np1; l++) { mul(Z[l], work[l][0], V[0], N); for(i = 1; i < mp1; i++) { assign_add_mul(Z[l], V[i], work[l][i], N); } } /* copy back to V */ for(i = 0; i < np1; i++) { assign(V[i], Z[i], N); } /* Reorthogonalise v_nr_ev */ ModifiedGS((complex*)V[nr_ev], _N, nr_ev, (complex*)V[0], V2); if(g_debug_level > 3) { for(i = 0; i < np1; i++) { for(l = 0; l < np1; l++) { tmp1 = scalar_prod(V[l], V[i], N, 1); if(g_proc_id == 0) { printf("(V[%d], V[%d]) = %e %e %d %d %d %d %d %d %e %e\n", l, i, tmp1.re, tmp1.im, np1, mp1, ne, _m, _N, V2, H[l][i].re, H[l][i].im); } } } } /* Copy the content of H to work, work2 and G */ for(i=0; i < mp1; i++) { for(l = 0; l < mp1; l++) { G[i][l] = H[i][l]; work2[i][l] = H[i][l]; work[l][i].re = H[i][l].re; work[l][i].im = -H[i][l].im; } } for(j = ne; j < m; j++) { /* solver_field[0]=A*v_j */ f(solver_field[1], V[j]); /* Set h_ij and omega_j */ /* solver_field[1] <- omega_j */ /* assign(solver_field[1], solver_field[0], N); */ for(i = 0; i <= j; i++){ H[j][i] = scalar_prod(V[i], solver_field[1], N, 1); /* H, G, work and work2 are now all in Fortran storage: columns first */ G[j][i] = H[j][i]; work2[j][i] = H[j][i]; work[i][j].re = H[j][i].re; work[i][j].im = -H[j][i].im; assign_diff_mul(solver_field[1], V[i], H[j][i], N); } beta2 = square_norm(solver_field[1], N, 1); _complex_set(H[j][j+1], sqrt(beta2), 0.); G[j][j+1] = H[j][j+1]; work2[j][j+1] = H[j][j+1]; work[j+1][j].re = H[j][j+1].re; work[j+1][j].im = -H[j][j+1].im; mul_r(V[(j+1)], 1./H[j][j+1].re, solver_field[1], N); } /* Solve the least square problem for alpha*/ /* This produces c=V_m+1*r0 */ for(i = 0; i < mp1; i++) { c[i] = scalar_prod(V[i], r0, N, 1); alpha[i] = c[i]; if(g_proc_id == 0 && g_debug_level > 3) { printf("c: %e %e err = %e\n", c[i].re, c[i].im, err); } } if(lswork == NULL) { lwork = -1; _FT(zgels) ("N", &mp1, &_m, &one, H[0], &mp1, alpha, &mp1, &tmp1, &lwork, &info, 1); lwork = (int)tmp1.re; lswork = (complex*)malloc(lwork*sizeof(complex)); } _FT(zgels) ("N", &mp1, &_m, &one, H[0], &mp1, alpha, &mp1, lswork, &lwork, &info, 1); if(g_proc_id == 0 && g_debug_level > 3) { printf("zgels returned info = %d\n", info); fflush(stdout); } /* Compute the new solution vector */ for(i = 0; i < m; i++){ if(g_proc_id == 0 && g_debug_level > 3) { printf("alpha: %e %e\n", alpha[i].re, alpha[i].im); } assign_add_mul(x0, V[i], alpha[i], N); } } /* If maximal number of restart is reached */ assign(P, x0, N); finalize_solver(solver_field, nr_sf); return(-1); }
void QuickSort::sort(int* aArray, int aLength) { quicksort(aArray, 0, aLength-1); }
void sorter(int fd) { int i, tempfile; size_t readsize; int buffer[(int) allowedBuffer/sizeof(int)]; int flno; int file1, file2, target; size_t size1, size2; /* Amount of bytes read from file1 and file 2 */ int a, b; /* Elements from file 1 and file 2 */ for (i=0; i<fileAmount; i++) { char newfile[10]; sprintf(newfile, "file%d", i); tempfile=creat(newfile, 0666); readsize=read(fd, &buffer, allowedBuffer); quicksort(buffer, 0, (int) allowedBuffer/sizeof(int)); /*qsort(buffer, (size_t) readsize/sizeof(int), sizeof(int), comparator); */ /* In case things go south with wiki-quicksort */ write(tempfile, buffer, readsize); close(tempfile); } /* printf("Finished splitting and sorting temporary files\n"); */ lseek(fd, 0, SEEK_SET); /* printf("Amount of temp files is (%d)\n", fileAmount); */ for (flno=0; flno<fileAmount; flno+=2) /* This is a \"tail\" file. So we are taking 1st and 2nd file, then 2nd and 3rd... We don't want when we take n-1 and n fileto go out of reach */ { /* printf("Entered %d iteration\n", flno); */ int buffer[(int) allowedBuffer/sizeof(int)]; char newfile[10]; sprintf(newfile, "file%d", flno); /* printf("Old tempfile to work with is %s\n", newfile); */ file1=open(newfile, O_RDWR, 0666); sprintf(newfile, "file%d", flno+1); /* printf("Old tempfile to work with is %s\n", newfile); */ file2=open(newfile, O_RDWR, 0666); fileAmount++; sprintf(newfile, "file%d", fileAmount); /* printf("Created new tempfile to work with - %s\n", newfile); */ target=creat(newfile, 0666); size1=read(file1, &a, sizeof(int)); size2=read(file2, &b, sizeof(int)); /* printf("a=[%d], b=[%d]\n", a, b); */ while (((int) size1>0) && ((int) size2>0)) { if (a<b) { write(target, &a, sizeof(int)); size1=read(file1, &a, sizeof(int)); } else { if (a>b) { write(target, &b, sizeof(int)); size2=read(file2, &b, sizeof(int)); } else { write(target, &a, sizeof(int)); write(target, &b, sizeof(int)); size1=read(file1, &a, sizeof(int)); size2=read(file2, &b, sizeof(int)); /* printf("a=[%d], b=[%d]\n", a, b); */ } } } if ((int) size1>0) write(target, &a, sizeof(int)); if ((int) size2>0) write(target, &b, sizeof(int)); /* printf("Finished writing, size1 and size 2 are %d and %d\n", size1, size2); */ while ((int) size1>0) { size1=read(file1, buffer, allowedBuffer); write(target, buffer, size1); /* printf("%d\n", size1); */ /* printf("Writing 1 %d\n", flno); */ } while ((int) size2>0) { size2=read(file2, buffer, allowedBuffer); write(target, buffer, size2); /* printf("Writing 2 %d\n", flno); */ } /* while ((size2=read(file2, buffer, allowedBuffer))!=0) write(target, buffer, size2); */ close(file1); sprintf(newfile, "file%d", flno); remove(newfile); /* printf("Removed [%s]\n", newfile); */ close(file2); sprintf(newfile, "file%d", flno+1); remove(newfile); /* printf("Removed [%s]\n", newfile); */ close(target); } printf("Finished merging temporary files\n"); }
int quicksort(int N,int *v){ int i,j=0,pivot,*less,*great,*pivots; int m=0,n=0,o=0; //pick middle point of array pivot=N/2; less=(int *) malloc(N *sizeof(int)); great=(int *) malloc(N *sizeof(int)); pivots=(int *) malloc(N *sizeof(int)); if (N<=1) { return *v; }else{ //printf("\n"); //FOR LOOP判斷v[i]與pivot的大小 for (i=0; i<N; ++i) { if (v[i]< v[pivot]) { less[m]=v[i]; //printf("pivot:(%d,%d)\n",v[i],v[pivot]); m++; } else if (v[i] > v[pivot]){ great[n]=v[i]; //printf("pivot:(%d,%d)\n",v[i],v[pivot]); n++; } else if (v[i] == v[pivot]){ pivots[o]=v[i]; //printf("pivot:(%d,%d,%d)\n",v[i],v[pivot],pivots[o]); o++; } /* for(k=0;k<o;++k){ printf("%d,%d,%d\n",i,k,pivots[k]); }*/ } } //對less部分做qsort quicksort(m, less); //對great部分做qsort quicksort(n,great); //將less、pivot、great組合在一起 //printf("\nless:"); for(i=0;i<m;++i){ //printf("\n%d,%d,",i,m); //printf("%d,",less[i]); v[j]=less[i]; j++; } //printf("\npivots:"); for(i=0;i<o;++i){ //printf("\n%d,%d,",i,o); //printf("%d,",pivots[i]); //printf("%d",j); v[j]=pivots[i]; j++; } //printf("\ngreat:"); for(i=0;i<n;++i){ //printf("\n%d,%d,",i,n); //printf("%d,",great[i]); //printf("%d",j); v[j]=great[i]; j++; } //printf("\n"); return *v; }