static double multigrid_recurse(struct element **sel, double ****f, double ****x, int var, int level, double eps, void (*opeval) ()) { if (level == (*sel)->precon->nlevels - 1) return conjugate_gradient(sel, f, x, var, opeval, jacobi_preconditioner, eps, 500); const int m = 15; const int n1 = (*sel)->basis->n; const int n2 = (*sel)->precon->nmg[level + 1]; double ****r = new_4d_array(n1, n1, n1, (*sel)->nel); double ****fn = new_4d_array(n2, n2, n2, (*sel)->nel); double ****e = new_4d_array(n2, n2, n2, (*sel)->nel); if (level == 0) compute_residual(sel, x, f, r, var, opeval); else copy_array(***f, ***r, n1 * n1 * n1 * (*sel)->nel); switch_multigrid_level(sel, level + 1); multigrid_restriction(sel, r, fn, level + 1); multigrid_recurse(sel, fn, e, var, level + 1, eps, opeval); switch_multigrid_level(sel, level); multigrid_prolongation(sel, e, r, level); add_array(***r, ***x, n1 * n1 * n1 * (*sel)->nel, 1.0); double err = conjugate_gradient(sel, f, x, var, opeval, jacobi_preconditioner, eps, m); free_4d_array(r); free_4d_array(fn); free_4d_array(e); return err; }
void FMultiGrid::compute_residual (MultiFab & phi, MultiFab & rhs, MultiFab & res) { PArray<MultiFab> phi_p; PArray<MultiFab> rhs_p; PArray<MultiFab> res_p; Copy(phi_p, phi); Copy(rhs_p, rhs); Copy(res_p, res); compute_residual(phi_p, rhs_p, res_p); }
double* inner_outer_gauss_seidel_alg_vecs( Graph g, double alpha, double tol, int maxit, double *v, double *x, double *y, double *f, double beta, double itol, bool resid, bool normed) { size_t n = (size_t)size(g); stime_struct start; simple_time_clock(&start); printf("inoutgs(%6.4f,%6.4f,%8e,%1i) with tol=%8e and maxit=%6i iterations\n", alpha, beta, itol, resid, tol, maxit); fflush(stdout); double odelta, sumy=0.0, dtx, nx, ndiff, ltol=log(tol), la=log(alpha), dt; if (dangling_mult(g, x, y, v, 1.0, &dtx, NULL)) { return (NULL); } odelta = compute_outer_residual(x, y, v, alpha, n); int iter = 0, rval, nresit = 0, nmult = 1, nresids = 0; #ifdef BVALGS_VERBOSE printf(" iogs (outer) : iter = %6i ; odelta = %10e ; dt = %7.1f\n", iter, odelta, elapsed_time(&start)); #endif while (odelta > tol && nmult < maxit) { int iiter=0; double idelta = odelta; compute_f(f, y, v, alpha, beta, n); while (iter+iiter < maxit && idelta > itol) { gauss_seidel_sweep(g, x, NULL, f, beta, 1.0, dtx, false, &nx, &idelta, &dtx); nmult++; if (normed) { shift_and_scale(x,0.0,1./nx,n); dtx=dtx/nx; nx=1.0; } idelta = idelta; iiter++; // adjust for diff and not residual } if (dangling_mult(g, x, y, v, 1.0, &dtx, &dtx)) { return (NULL); } iter++; nmult++; odelta = compute_outer_residual(x,y,v,alpha,n); #ifdef BVALGS_VERBOSE printf(" iogs (outer) : iter = %6i ; odelta = %10e ; dt = %7.1f ; nmult = %9i\n", iter, odelta, elapsed_time(&start), nmult); #endif if (iiter < 2 || odelta < itol) { break; } } dtx = sum_dtx(g,x); dt = elapsed_time(&start); simple_time_clock(&start); while (odelta > tol && nmult-nresids < maxit) { rval = gauss_seidel_sweep(g, x, NULL, v, alpha, (1.0-alpha), dtx, false, &nx, &ndiff, &dtx); nmult++; iter++; if (normed) { shift_and_scale(x,0.0,1./nx,n); dtx=dtx/nx; nx=1.0; } if (rval) { return (NULL); } dt += elapsed_time(&start); /* compute the residual */ if (resid) { odelta = compute_residual(g,x,y,v,alpha,dtx,nx); nmult++; nresids++; simple_time_clock(&start); } else { simple_time_clock(&start); if (ndiff < tol && iter>nresit) { odelta = compute_residual(g,x,y,v,alpha,dtx,nx); nmult++; nresids++; nresit=iter+(int)((ltol - log(odelta))/(2.0*la)); } } #ifdef BVALGS_VERBOSE printf(" iogs ( gs) : iter = %6i ; delta = %10e ; diff = %10e ; dt = %7.1f sec ; nmult = %6i\n", iter, odelta, ndiff, dt, nmult ); #endif } if (odelta > tol) { printf("iogs(%6.4f) did not converge to %8e in %6i sweeps\n", alpha, tol, maxit); fflush(stdout); } else { printf("iogs : solved pagerank(a=%6.4f) in %5i its, %5i sweeps, and %5i mults to %8e tol\n", alpha, iter, nmult-nresids, nmult, tol); fflush(stdout); } return y; }
/* ATS_SOUND *tracker (ANARGS *anargs, char *soundfile) * partial tracking function * anargs: pointer to analysis parameters * soundfile: path to input file * returns an ATS_SOUND with data issued from analysis */ ATS_SOUND *tracker (ANARGS *anargs, char *soundfile, char *resfile) { int fd, M_2, first_point, filptr, n_partials = 0; int frame_n, k, sflen, *win_samps, peaks_size, tracks_size = 0; int i, frame, i_tmp; float *window, norm, sfdur, f_tmp; /* declare structures and buffers */ ATS_SOUND *sound = NULL; ATS_PEAK *peaks, *tracks = NULL, cpy_peak; ATS_FRAME *ana_frames = NULL, *unmatched_peaks = NULL; mus_sample_t **bufs; ATS_FFT fft; #ifdef FFTW fftw_plan plan; FILE *fftw_wisdom_file; #endif /* open input file we get srate and total_samps in file in anargs */ if ((fd = mus_sound_open_input(soundfile))== -1) { fprintf(stderr, "%s: %s\n", soundfile, strerror(errno)); return(NULL); } /* warn about multi-channel sound files */ if (mus_sound_chans(soundfile) > 1) { fprintf(stderr, "Error: file has %d channels, must be mono!\n", mus_sound_chans(soundfile)); return(NULL); } fprintf(stderr, "tracking...\n"); /* get sample rate and # of frames from file header */ anargs->srate = mus_sound_srate(soundfile); sflen = mus_sound_frames(soundfile); sfdur = (float)sflen/anargs->srate; /* check analysis parameters */ /* check start time */ if( !(anargs->start >= 0.0 && anargs->start < sfdur) ){ fprintf(stderr, "Warning: start %f out of bounds, corrected to 0.0\n", anargs->start); anargs->start = (float)0.0; } /* check duration */ if(anargs->duration == ATSA_DUR) { anargs->duration = sfdur - anargs->start; } f_tmp = anargs->duration + anargs->start; if( !(anargs->duration > 0.0 && f_tmp <= sfdur) ){ fprintf(stderr, "Warning: duration %f out of bounds, limited to file duration\n", anargs->duration); anargs->duration = sfdur - anargs->start; } /* print time bounds */ fprintf(stderr, "start: %f duration: %f file dur: %f\n", anargs->start, anargs->duration , sfdur); /* check lowest frequency */ if( !(anargs->lowest_freq > 0.0 && anargs->lowest_freq < anargs->highest_freq)){ fprintf(stderr, "Warning: lowest freq. %f out of bounds, forced to default: %f\n", anargs->lowest_freq, ATSA_LFREQ); anargs->lowest_freq = ATSA_LFREQ; } /* check highest frequency */ if( !(anargs->highest_freq > anargs->lowest_freq && anargs->highest_freq <= anargs->srate * 0.5 )){ fprintf(stderr, "Warning: highest freq. %f out of bounds, forced to default: %f\n", anargs->highest_freq, ATSA_HFREQ); anargs->highest_freq = ATSA_HFREQ; } /* frequency deviation */ if( !(anargs->freq_dev > 0.0 && anargs->freq_dev < 1.0) ){ fprintf(stderr, "Warning: freq. dev. %f out of bounds, should be > 0.0 and <= 1.0, forced to default: %f\n", anargs->freq_dev, ATSA_FREQDEV); anargs->freq_dev = ATSA_FREQDEV; } /* window cycles */ if( !(anargs->win_cycles >= 1 && anargs->win_cycles <= 8) ){ fprintf(stderr, "Warning: windows cycles %d out of bounds, should be between 1 and 8, forced to default: %d\n", anargs->win_cycles, ATSA_WCYCLES); anargs->win_cycles = ATSA_WCYCLES; } /* window type */ if( !(anargs->win_type >= 0 && anargs->win_type <= 3) ){ fprintf(stderr, "Warning: window type %d out of bounds, should be between 0 and 3, forced to default: %d\n", anargs->win_type, ATSA_WTYPE); anargs->win_type = ATSA_WTYPE; } /* hop size */ if( !(anargs->hop_size > 0.0 && anargs->hop_size <= 1.0) ){ fprintf(stderr, "Warning: hop size %f out of bounds, should be > 0.0 and <= 1.0, forced to default: %f\n", anargs->hop_size, ATSA_HSIZE); anargs->hop_size = ATSA_HSIZE; } /* lowest mag */ if( !(anargs->lowest_mag <= 0.0) ){ fprintf(stderr, "Warning: lowest magnitude %f out of bounds, should be >= 0.0 and <= 1.0, forced to default: %f\n", anargs->lowest_mag, ATSA_LMAG); anargs->lowest_mag = ATSA_LMAG; } /* set some values before checking next set of parameters */ anargs->first_smp = (int)floor(anargs->start * (float)anargs->srate); anargs->total_samps = (int)floor(anargs->duration * (float)anargs->srate); /* fundamental cycles */ anargs->cycle_smp = (int)floor((double)anargs->win_cycles * (double)anargs->srate / (double)anargs->lowest_freq); /* window size */ anargs->win_size = (anargs->cycle_smp % 2 == 0) ? anargs->cycle_smp+1 : anargs->cycle_smp; /* calculate hop samples */ anargs->hop_smp = floor( (float)anargs->win_size * anargs->hop_size ); /* compute total number of frames */ anargs->frames = compute_frames(anargs); /* check that we have enough frames for the analysis */ if( !(anargs->frames >= ATSA_MFRAMES) ){ fprintf(stderr, "Error: %d frames are not enough for analysis, nead at least %d\n", anargs->frames , ATSA_MFRAMES); return(NULL); } /* check other user parameters */ /* track length */ if( !(anargs->track_len >= 1 && anargs->track_len < anargs->frames) ){ i_tmp = (ATSA_TRKLEN < anargs->frames) ? ATSA_TRKLEN : anargs->frames-1; fprintf(stderr, "Warning: track length %d out of bounds, forced to: %d\n", anargs->track_len , i_tmp); anargs->track_len = i_tmp; } /* min. segment length */ if( !(anargs->min_seg_len >= 1 && anargs->min_seg_len < anargs->frames) ){ i_tmp = (ATSA_MSEGLEN < anargs->frames) ? ATSA_MSEGLEN : anargs->frames-1; fprintf(stderr, "Warning: min. segment length %d out of bounds, forced to: %d\n", anargs->min_seg_len, i_tmp); anargs->min_seg_len = i_tmp; } /* min. gap length */ if( !(anargs->min_gap_len >= 0 && anargs->min_gap_len < anargs->frames) ){ i_tmp = (ATSA_MGAPLEN < anargs->frames) ? ATSA_MGAPLEN : anargs->frames-1; fprintf(stderr, "Warning: min. gap length %d out of bounds, forced to: %d\n", anargs->min_gap_len, i_tmp); anargs->min_gap_len = i_tmp; } /* SMR threshold */ if( !(anargs->SMR_thres >= 0.0 && anargs->SMR_thres < ATSA_MAX_DB_SPL) ){ fprintf(stderr, "Warning: SMR threshold %f out of bounds, shoul be >= 0.0 and < %f dB SPL, forced to default: %f\n", anargs->SMR_thres, ATSA_MAX_DB_SPL, ATSA_SMRTHRES); anargs->SMR_thres = ATSA_SMRTHRES; } /* min. seg. SMR */ if( !(anargs->min_seg_SMR >= anargs->SMR_thres && anargs->min_seg_SMR < ATSA_MAX_DB_SPL) ){ fprintf(stderr, "Warning: min. seg. SMR %f out of bounds, shoul be >= %f and < %f dB SPL, forced to default: %f\n", anargs->min_seg_SMR, anargs->SMR_thres, ATSA_MAX_DB_SPL, ATSA_MSEGSMR); anargs->min_seg_SMR = ATSA_MSEGSMR; } /* last peak contibution */ if( !(anargs->last_peak_cont >= 0.0 && anargs->last_peak_cont <= 1.0) ){ fprintf(stderr, "Warning: last peak contibution %f out of bounds, should be >= 0.0 and <= 1.0, forced to default: %f\n", anargs->last_peak_cont, ATSA_LPKCONT); anargs->last_peak_cont = ATSA_LPKCONT; } /* SMR cont. */ if( !(anargs->SMR_cont >= 0.0 && anargs->SMR_cont <= 1.0) ){ fprintf(stderr, "Warning: SMR contibution %f out of bounds, should be >= 0.0 and <= 1.0, forced to default: %f\n", anargs->SMR_cont, ATSA_SMRCONT); anargs->SMR_cont = ATSA_SMRCONT; } /* continue computing parameters */ /* fft size */ anargs->fft_size = ppp2(2*anargs->win_size); /* allocate memory for sound, we read the whole sound in memory */ bufs = (mus_sample_t **)malloc(sizeof(mus_sample_t*)); bufs[0] = (mus_sample_t *)malloc(sflen * sizeof(mus_sample_t)); /* bufs = malloc(sizeof(mus_sample_t*)); bufs[0] = malloc(sflen * sizeof(mus_sample_t)); */ /* make our window */ window = make_window(anargs->win_type, anargs->win_size); /* get window norm */ norm = window_norm(window, anargs->win_size); /* fft mag for computing frequencies */ anargs->fft_mag = (double)anargs->srate / (double)anargs->fft_size; /* lowest fft bin for analysis */ anargs->lowest_bin = floor( anargs->lowest_freq / anargs->fft_mag ); /* highest fft bin for analisis */ anargs->highest_bin = floor( anargs->highest_freq / anargs->fft_mag ); /* allocate an array analysis frames in memory */ ana_frames = (ATS_FRAME *)malloc(anargs->frames * sizeof(ATS_FRAME)); /* alocate memory to store mid-point window sample numbers */ win_samps = (int *)malloc(anargs->frames * sizeof(int)); /* center point of window */ M_2 = floor((anargs->win_size - 1) / 2); /* first point in fft buffer to write */ first_point = anargs->fft_size - M_2; /* half a window from first sample */ filptr = anargs->first_smp - M_2; /* read sound into memory */ mus_sound_read(fd, 0, sflen-1, 1, bufs); /* make our fft-struct */ fft.size = anargs->fft_size; fft.rate = anargs->srate; #ifdef FFTW fft.data = fftw_malloc(sizeof(fftw_complex) * fft.size); if(fftw_import_system_wisdom()) fprintf(stderr, "system wisdom loaded!\n"); else fprintf(stderr, "cannot locate system wisdom!\n"); if((fftw_wisdom_file = fopen("ats-wisdom", "r")) != NULL) { fftw_import_wisdom_from_file(fftw_wisdom_file); fprintf(stderr, "ats-wisdom loaded!\n"); fclose(fftw_wisdom_file); } else fprintf(stderr, "cannot locate ats-wisdom!\n"); plan = fftw_plan_dft_1d(fft.size, fft.data, fft.data, FFTW_FORWARD, FFTW_PATIENT); #else fft.fdr = (double *)malloc(anargs->fft_size * sizeof(double)); fft.fdi = (double *)malloc(anargs->fft_size * sizeof(double)); #endif /* main loop */ for (frame_n=0; frame_n<anargs->frames; frame_n++) { /* clear fft arrays */ #ifdef FFTW for(k=0; k<fft.size; k++) fft.data[k][0] = fft.data[k][1] = 0.0f; #else for(k=0; k<fft.size; k++) fft.fdr[k] = fft.fdi[k] = 0.0f; #endif /* multiply by window */ for (k=0; k<anargs->win_size; k++) { if ((filptr >= 0) && (filptr < sflen)) #ifdef FFTW fft.data[(k+first_point)%fft.size][0] = window[k] * MUS_SAMPLE_TO_FLOAT(bufs[0][filptr]); #else fft.fdr[(k+first_point)%anargs->fft_size] = window[k] * MUS_SAMPLE_TO_FLOAT(bufs[0][filptr]); #endif filptr++; } /* we keep sample numbers of window midpoints in win_samps array */ win_samps[frame_n] = filptr - M_2 - 1; /* move file pointer back */ filptr = filptr - anargs->win_size + anargs->hop_smp; /* take the fft */ #ifdef FFTW fftw_execute(plan); #else fft_slow(fft.fdr, fft.fdi, fft.size, 1); #endif /* peak detection */ peaks_size = 0; peaks = peak_detection(&fft, anargs->lowest_bin, anargs->highest_bin, anargs->lowest_mag, norm, &peaks_size); /* peak tracking */ if (peaks != NULL) { /* evaluate peaks SMR (masking curves) */ evaluate_smr(peaks, peaks_size); if (frame_n) { /* initialize or update tracks */ if ((tracks = update_tracks(tracks, &tracks_size, anargs->track_len, frame_n, ana_frames, anargs->last_peak_cont)) != NULL) { /* do peak matching */ unmatched_peaks = peak_tracking(tracks, &tracks_size, peaks, &peaks_size, anargs->freq_dev, 2.0 * anargs->SMR_cont, &n_partials); /* kill unmatched peaks from previous frame */ if(unmatched_peaks[0].peaks != NULL) { for(k=0; k<unmatched_peaks[0].n_peaks; k++) { cpy_peak = unmatched_peaks[0].peaks[k]; cpy_peak.amp = cpy_peak.smr = 0.0; peaks = push_peak(&cpy_peak, peaks, &peaks_size); } free(unmatched_peaks[0].peaks); } /* give birth to peaks from new frame */ if(unmatched_peaks[1].peaks != NULL) { for(k=0; k<unmatched_peaks[1].n_peaks; k++) { tracks = push_peak(&unmatched_peaks[1].peaks[k], tracks, &tracks_size); unmatched_peaks[1].peaks[k].amp = unmatched_peaks[1].peaks[k].smr = 0.0; ana_frames[frame_n-1].peaks = push_peak(&unmatched_peaks[1].peaks[k], ana_frames[frame_n-1].peaks, &ana_frames[frame_n-1].n_peaks); } free(unmatched_peaks[1].peaks); } } else { /* give number to all peaks */ qsort(peaks, peaks_size, sizeof(ATS_PEAK), peak_frq_inc); for(k=0; k<peaks_size; k++) peaks[k].track = n_partials++; } } else { /* give number to all peaks */ qsort(peaks, peaks_size, sizeof(ATS_PEAK), peak_frq_inc); for(k=0; k<peaks_size; k++) peaks[k].track = n_partials++; } /* attach peaks to ana_frames */ ana_frames[frame_n].peaks = peaks; ana_frames[frame_n].n_peaks = n_partials; ana_frames[frame_n].time = (double)(win_samps[frame_n] - anargs->first_smp) / (double)anargs->srate; /* free memory */ free(unmatched_peaks); } else { /* if no peaks found, initialize empty frame */ ana_frames[frame_n].peaks = NULL; ana_frames[frame_n].n_peaks = 0; ana_frames[frame_n].time = (double)(win_samps[frame_n] - anargs->first_smp) / (double)anargs->srate; } } /* free up some memory */ free(window); free(tracks); #ifdef FFTW fftw_destroy_plan(plan); fftw_free(fft.data); #else free(fft.fdr); free(fft.fdi); #endif /* init sound */ fprintf(stderr, "Initializing ATS data..."); sound = (ATS_SOUND *)malloc(sizeof(ATS_SOUND)); init_sound(sound, anargs->srate, (int)(anargs->hop_size * anargs->win_size), anargs->win_size, anargs->frames, anargs->duration, n_partials, ((anargs->type == 3 || anargs->type == 4) ? 1 : 0)); /* store values from frames into the arrays */ for(k=0; k<n_partials; k++) { for(frame=0; frame<sound->frames; frame++) { sound->time[k][frame] = ana_frames[frame].time; for(i=0; i<ana_frames[frame].n_peaks; i++) if(ana_frames[frame].peaks[i].track == k) { sound->amp[k][frame] = ana_frames[frame].peaks[i].amp; sound->frq[k][frame] = ana_frames[frame].peaks[i].frq; sound->pha[k][frame] = ana_frames[frame].peaks[i].pha; sound->smr[k][frame] = ana_frames[frame].peaks[i].smr; } } } fprintf(stderr, "done!\n"); /* free up ana_frames memory */ /* first, free all peaks in each slot of ana_frames... */ for (k=0; k<anargs->frames; k++) free(ana_frames[k].peaks); /* ...then free ana_frames */ free(ana_frames); /* optimize sound */ optimize_sound(anargs, sound); /* compute residual */ if( anargs->type == 3 || anargs->type == 4 ) { fprintf(stderr, "Computing residual..."); compute_residual(bufs, sflen, resfile, sound, win_samps, anargs->srate); fprintf(stderr, "done!\n"); } /* free the rest of the memory */ free(win_samps); free(bufs[0]); free(bufs); /* analyze residual */ if( anargs->type == 3 || anargs->type == 4 ) { fprintf(stderr, "Analyzing residual..."); residual_analysis(ATSA_RES_FILE, sound); fprintf(stderr, "done!\n"); } #ifdef FFTW fftw_wisdom_file = fopen("ats-wisdom", "w"); fftw_export_wisdom_to_file(fftw_wisdom_file); fclose(fftw_wisdom_file); #endif fprintf(stderr, "tracking completed.\n"); return(sound); }
int main (int argc, char **argv) { int mpi_rank, mpi_size; int n, ln; double *A, *b, *xA, *xB, *xC, *r, *X, *x; double lmax, max; struct timeval before, after; MPI_Init (&argc, &argv); MPI_Comm_size (MPI_COMM_WORLD, &mpi_size); MPI_Comm_rank (MPI_COMM_WORLD, &mpi_rank); /* Get the argument that indicates the problem size */ n = JACOBI_DEF_SIZE; if (argc == 2) n = atoi (argv[1]); if (mpi_rank == ROOT_NODE) fprintf (stdout, "n = %d\n", n); ln = n / mpi_size; /* Initialize the random seed */ /*srandom((unsigned int) getpid ());*/ /* Allocate memory */ A = (double *) calloc (ln * n, sizeof(double)); if (A == NULL) { perror ("calloc"); MPI_Finalize (); return EXIT_FAILURE; } b = (double *) calloc (ln, sizeof(double)); if (b == NULL) { perror ("calloc"); MPI_Finalize (); return EXIT_FAILURE; } xA = (double *) calloc (ln, sizeof(double)); if (xA == NULL) { perror ("calloc"); MPI_Finalize (); return EXIT_FAILURE; } xB = (double *) calloc (ln, sizeof(double)); if (xB == NULL) { perror ("calloc"); MPI_Finalize (); return EXIT_FAILURE; } xC = (double *) calloc (ln, sizeof(double)); if (xC == NULL) { perror ("calloc"); MPI_Finalize (); return EXIT_FAILURE; } X = (double *) calloc (n, sizeof(double)); if (X == NULL) { perror ("calloc"); MPI_Finalize (); return EXIT_FAILURE; } r = (double *) calloc (ln, sizeof(double)); if (r == NULL) { perror ("calloc"); MPI_Finalize (); return EXIT_FAILURE; } generate_matrix (A, ln, n, mpi_rank); generate_vector (b, ln); gettimeofday(&before, NULL); x = jacobi (xA, xB, xC, A, b, ln, n, mpi_rank, mpi_size); gettimeofday(&after, NULL); MPI_Allgather (x, ln, MPI_DOUBLE, X, ln, MPI_DOUBLE, MPI_COMM_WORLD); /* Compute the residual */ compute_residual (r, A, X, b, ln, n); /* Compute the maximum absolute value of the residual */ lmax = find_max_abs (r, ln); MPI_Reduce (&lmax, &max, 1, MPI_DOUBLE, MPI_MAX, ROOT_NODE, MPI_COMM_WORLD); if (mpi_rank == ROOT_NODE) display_info (A, x, b, r, n, max, &before, &after); /* Free the memory */ free (A); free (b); free (xA); free (xB); free (xC); free (X); free (r); /* Return success */ MPI_Finalize (); return 0; }
int main(int argc, char* argv[]) { // Print help if necessary bool help = read_bool(argc, argv, "--help", false); if ((argc < 2) || (help)) { usage(argv); return 0; } // Use parameters struct for passing parameters to kernels efficiently parameters prm; // Parse inputs prm.matDims[0] = read_int(argc, argv, "--m", 2); prm.matDims[1] = read_int(argc, argv, "--k", 2); prm.matDims[2] = read_int(argc, argv, "--n", 2); prm.rank = read_int(argc, argv, "--rank", 7); prm.method = read_string(argc, argv, "--method", (char *)"als"); int maxIters = read_int(argc, argv, "--maxiters", 1000); int maxSecs = read_int(argc, argv, "--maxsecs", 1000); double tol = read_double(argc, argv, "--tol", 1e-8); int printItn = read_int(argc, argv, "--printitn", 0); double printTol = read_double(argc, argv, "--printtol", 1.0); int seed = read_int(argc, argv, "--seed", 0); int numSeeds = read_int(argc, argv, "--numseeds", 1); bool verbose = read_bool(argc, argv, "--verbose", false); prm.rnd_maxVal = read_double(argc,argv,"--maxval",1.0); prm.rnd_pwrOfTwo = read_int(argc,argv,"--pwrof2",0); bool roundFinal = read_bool(argc, argv, "--rndfin",false); prm.alpha = read_double(argc,argv, "--alpha", 0.1); int M = read_int(argc,argv, "--M", 0); if (M) { prm.M[0] = M; prm.M[1] = M; prm.M[2] = M; } else { prm.M[0] = read_int(argc, argv, "--M0", -1); prm.M[1] = read_int(argc, argv, "--M1", -1); prm.M[2] = read_int(argc, argv, "--M2", -1); } char * infile = read_string(argc, argv, "--input", NULL); char * outfile = read_string(argc, argv, "--output", NULL); if (verbose) { setbuf(stdout, NULL); printf("\n\n---------------------------------------------------------\n"); printf("PARAMETERS\n"); printf("dimensions = %d %d %d\n",prm.matDims[0],prm.matDims[1],prm.matDims[2]); printf("rank = %d\n",prm.rank); printf("method = %s\n",prm.method); if (infile) printf("input = %s\n",infile); else { if (numSeeds == 1) printf("input = seed %d\n",seed); else printf("inputs = seeds %d-%d\n",seed,seed+numSeeds-1); } if (outfile) printf("output = %s\n",outfile); else printf("output = none\n"); if (!strcmp(prm.method,"als")) { printf("tol = %1.2e\n",tol); printf("alpha = %1.2e\n",prm.alpha); printf("maval = %1.2e\n",prm.rnd_maxVal); printf("M's = (%d,%d,%d)\n",prm.M[0],prm.M[1],prm.M[2]); printf("maxiters = %d\n",maxIters); printf("maxsecs = %d\n",maxSecs); printf("printitn = %d\n",printItn); printf("printtol = %1.2e\n",printTol); } printf("---------------------------------------------------------\n"); } // Initialize other variables int i, j, k, numIters, mkn, tidx[3]; double err, errOld, errChange = 0.0, start_als, start_search, elapsed, threshold; // Compute tensor dimensions prm.dims[0] = prm.matDims[0]*prm.matDims[1]; prm.dims[1] = prm.matDims[1]*prm.matDims[2]; prm.dims[2] = prm.matDims[0]*prm.matDims[2]; // Compute tensor's nnz, total number of entries, and Frobenius norm mkn = prm.matDims[0]*prm.matDims[1]*prm.matDims[2]; prm.mkn2 = mkn*mkn; prm.xNorm = sqrt(mkn); // Compute number of columns in matricized tensors for (i = 0; i < 3; i++) prm.mtCols[i] = prm.mkn2 / prm.dims[i]; // Construct three matricizations of matmul tensor prm.X = (double**) malloc( 3 * sizeof(double*) ); for (i = 0; i < 3; i++) prm.X[i] = (double*) calloc( prm.mkn2, sizeof(double) ); for (int mm = 0; mm < prm.matDims[0]; mm++) for (int kk = 0; kk < prm.matDims[1]; kk++) for (int nn = 0; nn < prm.matDims[2]; nn++) { tidx[0] = mm + kk*prm.matDims[0]; tidx[1] = kk + nn*prm.matDims[1]; tidx[2] = mm + nn*prm.matDims[0]; prm.X[0][tidx[0]+prm.dims[0]*(tidx[1]+prm.dims[1]*tidx[2])] = 1; prm.X[1][tidx[1]+prm.dims[1]*(tidx[0]+prm.dims[0]*tidx[2])] = 1; prm.X[2][tidx[2]+prm.dims[2]*(tidx[0]+prm.dims[0]*tidx[1])] = 1; } // Allocate factor weights and matrices: working, initial, and model prm.lambda = (double*) malloc( prm.rank * sizeof(double) ); prm.U = (double**) malloc( 3 * sizeof(double*) ); double** U0 = (double**) malloc( 3 * sizeof(double*) ); prm.model = (double**) malloc( 3 * sizeof(double*) ); for (i = 0; i < 3; i++) { prm.U[i] = (double*) calloc( prm.mkn2, sizeof(double) ); U0[i] = (double*) calloc( prm.dims[i]*prm.rank, sizeof(double) ); prm.model[i] = (double*) calloc( prm.dims[i]*prm.rank, sizeof(double) ); } // Allocate coefficient matrix within ALS (Khatri-Rao product) int maxMatDim = prm.matDims[0]; if (maxMatDim < prm.matDims[1]) maxMatDim = prm.matDims[1]; if (maxMatDim < prm.matDims[2]) maxMatDim = prm.matDims[2]; prm.A = (double*) malloc( maxMatDim*mkn*prm.rank * sizeof(double) ); // Allocate workspaces prm.tau = (double*) malloc( mkn * sizeof(double) ); prm.lwork = maxMatDim*mkn*prm.rank; prm.work = (double*) malloc( prm.lwork * sizeof(double) ); prm.iwork = (int*) malloc( prm.mkn2 * sizeof(int) ); // Allocate matrices for normal equations int maxDim = prm.dims[0]; if (maxDim < prm.dims[1]) maxDim = prm.dims[1]; if (maxDim < prm.dims[2]) maxDim = prm.dims[2]; prm.NE_coeff = (double*) malloc( prm.rank*prm.rank * sizeof(double) ); prm.NE_rhs = (double*) malloc( maxDim*prm.rank * sizeof(double) ); prm.residual = (double*) malloc( prm.mkn2 * sizeof(double) ); //-------------------------------------------------- // Search Loop //-------------------------------------------------- int mySeed = seed, numGoodSeeds = 0, statusCnt = 0, status = 1; start_search = wall_time(); for (int seed_cnt = 0; seed_cnt < numSeeds; ++seed_cnt) { // Set starting point from random seed (match Matlab Tensor Toolbox) RandomMT cRMT(mySeed); for (i = 0; i < 3; i++) for (j = 0; j < prm.dims[i]; j++) for (k = 0; k < prm.rank; k++) U0[i][j+k*prm.dims[i]] = cRMT.genMatlabMT(); for (i = 0; i < prm.rank; i++) prm.lambda[i] = 1.0; // Copy starting point for (i = 0; i < 3; i++) cblas_dcopy(prm.dims[i]*prm.rank,U0[i],1,prm.U[i],1); // read from file if input is given if( infile ) read_input( infile, prm ); if (verbose) { printf("\nSTARTING POINT...\n"); for (i = 0; i < 3; i++) { printf("Factor matrix %d:\n",i); print_matrix(prm.U[i],prm.dims[i],prm.rank,prm.dims[i]); } printf("\n"); } //-------------------------------------------------- // Main ALS Loop //-------------------------------------------------- start_als = wall_time(); err = 1.0; threshold = 1e-4; for (numIters = 0; numIters < maxIters && (wall_time()-start_als) < maxSecs; numIters++) { errOld = err; if (!strcmp(prm.method,"als")) { // Perform an iteration of ALS using NE with Smirnov's penalty term err = als( prm ); } else if (!strcmp(prm.method,"sparsify")) { // print stats before sparsifying printf("Old residual: %1.2e\n",compute_residual(prm,2,true)); printf("Old nnz (larger than %1.1e): %d %d %d\n", threshold, nnz(prm.U[0],prm.dims[0]*prm.rank,threshold), nnz(prm.U[1],prm.dims[1]*prm.rank,threshold), nnz(prm.U[2],prm.dims[2]*prm.rank,threshold) ); // sparsify and return printf("\nSparsifying...\n\n"); sparsify( prm ); numIters = maxIters; // print stats after sparsifying printf("New residual: %1.2e\n",compute_residual(prm,2,true)); printf("New nnz (larger than %1.1e): %d %d %d\n", threshold, nnz(prm.U[0],prm.dims[0]*prm.rank,threshold), nnz(prm.U[1],prm.dims[1]*prm.rank,threshold), nnz(prm.U[2],prm.dims[2]*prm.rank,threshold) ); } else if (!strcmp(prm.method,"round")) { // print stats before rounding printf("Old residual: %1.2e\n",compute_residual(prm,2,true)); printf("Old nnz (larger than %1.1e): %d %d %d\n", threshold, nnz(prm.U[0],prm.dims[0]*prm.rank,threshold), nnz(prm.U[1],prm.dims[1]*prm.rank,threshold), nnz(prm.U[2],prm.dims[2]*prm.rank,threshold) ); // round and return for (i = 0; i < 3; i++) { capping(prm.U[i],prm.dims[i]*prm.rank,prm.rnd_maxVal); rounding(prm.U[i],prm.dims[i]*prm.rank,prm.rnd_pwrOfTwo); } numIters = maxIters; // print stats after rounding printf("New residual: %1.2e\n",compute_residual(prm,2,true)); printf("New nnz (larger than %1.1e): %d %d %d\n", threshold, nnz(prm.U[0],prm.dims[0]*prm.rank,threshold), nnz(prm.U[1],prm.dims[1]*prm.rank,threshold), nnz(prm.U[2],prm.dims[2]*prm.rank,threshold) ); } else die("Invalid method\n"); // Compute change in relative residual norm errChange = fabs(err - errOld); // Print info at current iteration if ((printItn > 0) && (((numIters + 1) % printItn) == 0)) { // print info printf ("Iter %d: residual = %1.5e change = %1.5e\n", numIters + 1, err, errChange); } // Check for convergence if ( numIters > 0 && errChange < tol ) break; } // If rounding, round final solution and re-compute residual if(roundFinal) { // normalize columns in A and B factors, put arbitrary weights into C normalize_model( prm, 2 ); // cap large values and round to nearest power of 2 for (i = 0; i < 3; i++) { capping(prm.U[i],prm.dims[i]*prm.rank,prm.rnd_maxVal); rounding(prm.U[i],prm.dims[i]*prm.rank,prm.rnd_pwrOfTwo); } err = compute_residual(prm,0,true); } // Print status if searching over many seeds statusCnt++; if (numSeeds > 1000 && statusCnt == numSeeds/10) { printf("...%d%% complete...\n",10*status); status++; statusCnt = 0; } // Print final info elapsed = wall_time() - start_als; if ((printItn > 0 || verbose) && !strcmp(prm.method,"als")) { if (infile) printf("\nInput %s ",infile); else printf("\nInitial seed %d ",mySeed); printf("achieved residual %1.3e in %d iterations and %1.3e seconds\n \t final residual change: %1.3e\n \t average time per iteration: %1.3e s\n", err, numIters, elapsed, errChange, elapsed/numIters); } if (verbose) { printf("\nSOLUTION...\n"); for (i = 0; i < 3; i++) { printf("Factor matrix %d:\n",i); if (roundFinal || !strcmp(prm.method,"round")) print_int_matrix(prm.U[i], prm.dims[i], prm.rank, prm.dims[i], prm.rnd_pwrOfTwo); else print_matrix(prm.U[i],prm.dims[i],prm.rank,prm.dims[i]); } if (err < printTol) numGoodSeeds++; } else if (err < printTol) { numGoodSeeds++; printf("\n\n***************************************\n"); if (infile) printf("Input %s: ",infile); else printf("Initial seed %d: ",mySeed); printf("after %d iterations, achieved residual %1.3e with final residual change of %1.3e\n", numIters, err, errChange); if (roundFinal) { for (i = 0; i < 3; i++) { printf("Factor matrix %d:\n",i); print_int_matrix(prm.U[i], prm.dims[i], prm.rank, prm.dims[i], prm.rnd_pwrOfTwo); } int count = 0; for (i = 0; i < 3; i++) count += nnz(prm.U[i],prm.dims[i]*prm.rank); printf("\ttotal nnz in solution: %d\n",count); printf("\tnaive adds/subs: %d\n",count - prm.dims[2] - 2*prm.rank); } printf("***************************************\n\n\n"); } // write to output if( outfile ) write_output( outfile, prm ); mySeed++; } // Final report of processor statistics elapsed = wall_time()-start_search; // Print stats if (!strcmp(prm.method,"als")) { printf("\n\n------------------------------------------------------------\n"); printf("Time elapsed: \t%1.1e\tseconds\n",elapsed); printf("Total number of seeds tried: \t%d\n",numSeeds); printf("Total number of good seeds: \t%d",numGoodSeeds); printf("\t(residual < %2.1e)\n",printTol); printf("------------------------------------------------------------\n"); } // free allocated memory for (i = 0; i < 3; i++) { free( prm.X[i] ); free( prm.U[i] ); free( U0[i] ); free( prm.model[i] ); } free( prm.X ); free( prm.U ); free( U0 ); free( prm.model ); free( prm.lambda ); free( prm.A ); free( prm.NE_coeff ); free( prm.NE_rhs ); free( prm.residual ); free( prm.tau ); free( prm.work ); free( prm.iwork ); return 0; }