Exemple #1
0
void calc_mean_global(void   *data_local,
                      void   *result,
	              size_t  n_data_local,
                      SID_Datatype type,
                      int          mode,
                      SID_Comm    *comm){
  double temp;
  size_t n_data;
  int    flag_abs;

  if(check_mode_for_flag(mode,CALC_MODE_ABS))
    flag_abs=CALC_MODE_ABS;
  else
    flag_abs=FALSE;

  calc_sum_global(&n_data_local,&n_data,1,           SID_SIZE_T,CALC_MODE_DEFAULT,               comm);
  if(n_data<1){
    if(type==SID_DOUBLE || check_mode_for_flag(mode,CALC_MODE_RETURN_DOUBLE))
      ((double *)result)[0]=0.;
    else if(type==SID_FLOAT)
      ((float  *)result)[0]=0.;
    else if(type==SID_INT)
      ((int    *)result)[0]=0;
    else if(type==SID_UNSIGNED)
      ((unsigned int *)result)[0]=0;
    else if(type==SID_SIZE_T)
      ((size_t *)result)[0]=0;
    else
      SID_trap_error("Unknown variable type in calc_min",ERROR_LOGIC);
  }
  else{
    calc_sum_global(data_local,   &temp,  n_data_local,type,      CALC_MODE_RETURN_DOUBLE|flag_abs,comm);
    temp/=(double)n_data;
    if(type==SID_DOUBLE)
      ((double *)result)[0]=(double)temp; 
    else if(type==SID_FLOAT)
      ((float  *)result)[0]=(float)temp; 
    else if(type==SID_INT)
      ((int    *)result)[0]=(int)temp; 
    else if(type==SID_UNSIGNED)
      ((unsigned int *)result)[0]=(unsigned int)temp; 
    else if(type==SID_SIZE_T)
      ((size_t *)result)[0]=(size_t)temp; 
  }
}
Exemple #2
0
size_t mark_particles(plist_info *plist, int run_mode, double *input_vals, const char *mark_name) {
    size_t   n_particles;
    size_t   n_particles_local;
    size_t   i_particle;
    int      i_species;
    size_t * id;
    GBPREAL *x;
    GBPREAL *y;
    GBPREAL *z;
    GBPREAL  r;
    int *    mark_array;
    int      flag_volume;
    int      flag_volume_sphere;
    size_t   n_marked_local = 0;
    size_t   n_marked       = 0;

    // Interpret run-mode
    flag_volume        =
            SID_CHECK_BITFIELD_SWITCH(run_mode, VOLUME_BOX) || SID_CHECK_BITFIELD_SWITCH(run_mode, VOLUME_SPHERE);
    flag_volume_sphere = SID_CHECK_BITFIELD_SWITCH(run_mode, VOLUME_SPHERE);

    // Loop over all species
    for(i_species = 0; i_species < N_GADGET_TYPE; i_species++) {
        if(ADaPS_exist(plist->data, "n_all_%s", plist->species[i_species])) {
            n_particles       = ((size_t *)ADaPS_fetch(plist->data, "n_all_%s", plist->species[i_species]))[0];
            n_particles_local = ((size_t *)ADaPS_fetch(plist->data, "n_%s", plist->species[i_species]))[0];
            // If this species has local particles
            if(n_particles_local > 0) {
                mark_array = (int *)SID_malloc(sizeof(int) * n_particles_local);
                // Mark particles in a volume
                if(flag_volume) {
                    x = (GBPREAL *)ADaPS_fetch(plist->data, "x_%s", plist->species[i_species]);
                    y = (GBPREAL *)ADaPS_fetch(plist->data, "y_%s", plist->species[i_species]);
                    z = (GBPREAL *)ADaPS_fetch(plist->data, "z_%s", plist->species[i_species]);
                    // Loop over all particles
                    for(i_particle = 0; i_particle < n_particles_local; i_particle++) {
                        mark_array[i_particle] = GBP_FALSE;
                        switch(flag_volume_sphere) {
                            case GBP_TRUE:
                                if(add_quad(3,
                                            (double)(x[i_particle]) - input_vals[0],
                                            (double)(y[i_particle]) - input_vals[1],
                                            (double)(z[i_particle]) - input_vals[2]) <= input_vals[3])
                                    mark_array[i_particle] = GBP_TRUE;
                                break;
                            case GBP_FALSE:
                                if(x[i_particle] >= (GBPREAL)input_vals[0] && x[i_particle] <= (GBPREAL)input_vals[1]) {
                                    if(y[i_particle] >= (GBPREAL)input_vals[2] && y[i_particle] <= (GBPREAL)input_vals[3]) {
                                        if(z[i_particle] >= (GBPREAL)input_vals[4] && z[i_particle] <= (GBPREAL)input_vals[5]) {
                                            mark_array[i_particle] = GBP_TRUE;
                                        }
                                    }
                                }
                                break;
                        }
                    }
                }
                // Mark particles by property
                else {
                }
                for(i_particle = 0; i_particle < n_particles_local; i_particle++)
                    if(mark_array[i_particle])
                        n_marked_local++;
                ADaPS_store(&(plist->data), (void *)mark_array, "%s_%s", ADaPS_DEFAULT, mark_name, plist->species[i_species]);
            }
        }
    }
    calc_sum_global(&n_marked_local, &n_marked, 1, SID_SIZE_T, CALC_MODE_DEFAULT, SID_COMM_WORLD);
    return (n_marked);
}
Exemple #3
0
void map_to_grid(size_t      n_particles_local,
                 GBPREAL *   x_particles_local,
                 GBPREAL *   y_particles_local,
                 GBPREAL *   z_particles_local,
                 GBPREAL *   v_particles_local,
                 GBPREAL *   w_particles_local,
                 cosmo_info *cosmo,
                 double      redshift,
                 int         distribution_scheme,
                 double      normalization_constant,
                 field_info *field,
                 field_info *field_norm,
                 int         mode) {
    size_t       i_p;
    int          i_k;
    size_t       i_b;
    size_t       i_grid;
    int          i_coord;
    int          i_i[3];
    int          j_i[3];
    int          k_i[3];
    size_t       n_particles;
    double       v_p;
    double       w_p;
    int          flag_valued_particles;
    int          flag_weight_particles;
    int          flag_weight;
    int          flag_active;
    int          flag_viable;
    double       k_mag;
    double       dk;
    int          n_powspec;
    int          mode_powspec;
    size_t *     n_mode_powspec;
    double *     k_powspec;
    double *     kmin_powspec;
    double *     kmax_powspec;
    double *     k_powspec_bin;
    double *     P_powspec;
    double *     dP_powspec;
    double       k_min;
    double       k_max;
    double       norm_local;
    double       normalization;
    GBPREAL      x_i;
    GBPREAL      x_particle_i;
    GBPREAL      y_particle_i;
    GBPREAL      z_particle_i;
    double       kernal_offset;
    int          W_search_lo;
    int          W_search_hi;
    size_t       receive_left_size  = 0;
    size_t       receive_right_size = 0;
    size_t       index_best;
    int          n_buffer[3];
    size_t       n_send_left;
    size_t       n_send_right;
    size_t       send_size_left;
    size_t       send_size_right;
    GBPREAL *    send_left          = NULL;
    GBPREAL *    send_right         = NULL;
    GBPREAL *    receive_left       = NULL;
    GBPREAL *    receive_right      = NULL;
    GBPREAL *    send_left_norm     = NULL;
    GBPREAL *    send_right_norm    = NULL;
    GBPREAL *    receive_left_norm  = NULL;
    GBPREAL *    receive_right_norm = NULL;
    double       r_i, r_min, r_i_max = 0;
    double       W_i;
    int          index_i;
    interp_info *P_k_interp;
    double *     r_Daub;
    double *     W_Daub;
    double       h_Hubble;
    int          n_Daub;
    interp_info *W_r_Daub_interp = NULL;
    int          i_rank;
    size_t       buffer_index;
    int          i_test;
    double       accumulator;

    // Compute the total poulation size and print a status message
    calc_sum_global(&n_particles_local, &n_particles, 1, SID_SIZE_T, CALC_MODE_DEFAULT, SID_COMM_WORLD);
    SID_log("Distributing %zu items onto a %dx%dx%d grid...", SID_LOG_OPEN, n_particles, field->n[0], field->n[1], field->n[2]);

    // If we've been given a normalization field, make sure it's got the same geometry as the results field
    if(field_norm != NULL) {
        if(field->n_d != field_norm->n_d)
            SID_exit_error("grid dimension counts don't match (ie. %d!=%d)", SID_ERROR_LOGIC, field->n_d,
                           field_norm->n_d);
        int i_d;
        for(i_d = 0; i_d < field->n_d; i_d++) {
            if(field->n[i_d] != field_norm->n[i_d])
                SID_exit_error("grid dimension No. %d's sizes don't match (ie. %d!=%d)", SID_ERROR_LOGIC, i_d,
                               field->n[i_d], field_norm->n[i_d]);
            if(field->n_R_local[i_d] != field_norm->n_R_local[i_d])
                SID_exit_error("grid dimension No. %d's slab sizes don't match (ie. %d!=%d)", SID_ERROR_LOGIC, i_d,
                               field->n_R_local[i_d], field_norm->n_R_local[i_d]);
            if(field->i_R_start_local[i_d] != field_norm->i_R_start_local[i_d])
                SID_exit_error("grid dimension No. %d's start positions don't match (ie. %le!=%le)", SID_ERROR_LOGIC,
                               i_d, field->i_R_start_local[i_d], field_norm->i_R_start_local[i_d]);
            if(field->i_R_stop_local[i_d] != field_norm->i_R_stop_local[i_d])
                SID_exit_error("grid dimension No. %d's stop positions don't match (ie. %le!=%le)", SID_ERROR_LOGIC,
                               i_d, field->i_R_stop_local[i_d], field_norm->i_R_stop_local[i_d]);
        }
        if(field->n_field != field_norm->n_field)
            SID_exit_error("grid field sizes don't match (ie. %d!=%d)", SID_ERROR_LOGIC, field->n_field,
                           field_norm->n_field);
        if(field->n_field_R_local != field_norm->n_field_R_local)
            SID_exit_error("grid local field sizes don't match (ie. %d!=%d)", SID_ERROR_LOGIC, field->n_field_R_local,
                           field_norm->n_field_R_local);
        if(field->total_local_size != field_norm->total_local_size)
            SID_exit_error("grid total local sizes don't match (ie. %d!=%d)", SID_ERROR_LOGIC, field->total_local_size,
                           field_norm->total_local_size);
    }

    // Set some variables
    if(v_particles_local != NULL)
        flag_valued_particles = GBP_TRUE;
    else {
        flag_valued_particles = GBP_FALSE;
        v_p                   = 1.;
    }
    if(w_particles_local != NULL)
        flag_weight_particles = GBP_TRUE;
    else {
        flag_weight_particles = GBP_FALSE;
        w_p                   = 1.;
    }
    h_Hubble = ((double *)ADaPS_fetch(cosmo, "h_Hubble"))[0];

    // Initializing the mass assignment scheme
    switch(distribution_scheme) {
        case MAP2GRID_DIST_DWT20:
            W_search_lo   = 2;
            W_search_hi   = 7;
            kernal_offset = 2.5;
            compute_Daubechies_scaling_fctns(20, 5, &r_Daub, &W_Daub, &n_Daub);
            init_interpolate(r_Daub, W_Daub, n_Daub, gsl_interp_cspline, &W_r_Daub_interp);
            SID_free(SID_FARG r_Daub);
            SID_free(SID_FARG W_Daub);
            SID_log("(using D20 scale function kernal)...", SID_LOG_CONTINUE);
            break;
        case MAP2GRID_DIST_DWT12:
            W_search_lo   = 1;
            W_search_hi   = 6;
            kernal_offset = 1.75;
            compute_Daubechies_scaling_fctns(12, 5, &r_Daub, &W_Daub, &n_Daub);
            init_interpolate(r_Daub, W_Daub, (size_t)n_Daub, gsl_interp_cspline, &W_r_Daub_interp);
            SID_free(SID_FARG r_Daub);
            SID_free(SID_FARG W_Daub);
            SID_log("(using D12 scale function kernal)...", SID_LOG_CONTINUE);
            break;
        case MAP2GRID_DIST_TSC:
            W_search_lo = 2;
            W_search_hi = 2;
            SID_log("(using triangular shaped function kernal)...", SID_LOG_CONTINUE);
            break;
        case MAP2GRID_DIST_CIC:
            SID_log("(using cloud-in-cell kernal)...", SID_LOG_CONTINUE);
        case MAP2GRID_DIST_NGP:
        default:
            W_search_lo = 1;
            W_search_hi = 1;
            SID_log("(using nearest grid point kernal)...", SID_LOG_CONTINUE);
            break;
    }

    // Initializing slab buffers
    n_send_left     = (size_t)(field->n[0] * field->n[1] * W_search_lo);
    n_send_right    = (size_t)(field->n[0] * field->n[1] * W_search_hi);
    send_size_left  = n_send_left * sizeof(GBPREAL);
    send_size_right = n_send_right * sizeof(GBPREAL);
    send_left       = (GBPREAL *)SID_calloc(send_size_left);
    send_right      = (GBPREAL *)SID_calloc(send_size_right);
    receive_left    = (GBPREAL *)SID_calloc(send_size_right);
    receive_right   = (GBPREAL *)SID_calloc(send_size_left);
    if(field_norm != NULL) {
        send_left_norm     = (GBPREAL *)SID_calloc(send_size_left);
        send_right_norm    = (GBPREAL *)SID_calloc(send_size_right);
        receive_left_norm  = (GBPREAL *)SID_calloc(send_size_right);
        receive_right_norm = (GBPREAL *)SID_calloc(send_size_left);
    }

    // Clear the field
    if(!SID_CHECK_BITFIELD_SWITCH(mode, MAP2GRID_MODE_NOCLEAN)) {
        SID_log("Clearing fields...", SID_LOG_OPEN);
        clear_field(field);
        if(field_norm != NULL)
            clear_field(field);
        SID_log("Done.", SID_LOG_CLOSE);
    }

    // It is essential that we not pad the field for the simple way that we add-in the boundary buffers below
    set_FFT_padding_state(field, GBP_FALSE);
    if(field_norm != NULL)
        set_FFT_padding_state(field_norm, GBP_FALSE);

    // Create the mass distribution
    SID_log("Performing grid assignment...", SID_LOG_OPEN | SID_LOG_TIMER);

    // Loop over all the objects
    pcounter_info pcounter;
    SID_Init_pcounter(&pcounter, n_particles_local, 10);
    for(i_p = 0, norm_local = 0.; i_p < n_particles_local; i_p++) {
        double norm_i;
        double value_i;
        if(flag_valued_particles)
            v_p = (double)(v_particles_local[i_p]);
        if(flag_weight_particles)
            w_p = (double)(w_particles_local[i_p]);
        norm_i  = w_p;
        value_i = v_p * norm_i;

        // Particle's position
        x_particle_i = (GBPREAL)x_particles_local[i_p];
        y_particle_i = (GBPREAL)y_particles_local[i_p];
        z_particle_i = (GBPREAL)z_particles_local[i_p];

        // Quantize it onto the grid
        x_particle_i /= (GBPREAL)field->dR[0];
        y_particle_i /= (GBPREAL)field->dR[1];
        z_particle_i /= (GBPREAL)field->dR[2];
        i_i[0] = (int)x_particle_i; // position in grid-coordinates
        i_i[1] = (int)y_particle_i; // position in grid-coordinates
        i_i[2] = (int)z_particle_i; // position in grid-coordinates

        // Apply the kernel
        flag_viable = GBP_TRUE;
        double x_i_effective;
        for(j_i[0] = -W_search_lo; j_i[0] <= W_search_hi; j_i[0]++) {
            for(j_i[1] = -W_search_lo; j_i[1] <= W_search_hi; j_i[1]++) {
                for(j_i[2] = -W_search_lo; j_i[2] <= W_search_hi; j_i[2]++) {
                    // Compute distance to each grid point being searched against ...
                    flag_active = GBP_TRUE;
                    for(i_coord = 0, W_i = 1.; i_coord < 3; i_coord++) {
                        switch(i_coord) {
                            case 0:
                                x_i = (GBPREAL)(i_i[0] + j_i[0]) - x_particle_i;
                                break;
                            case 1:
                                x_i = (GBPREAL)(i_i[1] + j_i[1]) - y_particle_i;
                                break;
                            case 2:
                                x_i = (GBPREAL)(i_i[2] + j_i[2]) - z_particle_i;
                                break;
                        }
                        switch(distribution_scheme) {
                                // Distribute with a Daubechies wavelet transform of 12th or 20th order a la Cui et al '08
                            case MAP2GRID_DIST_DWT12:
                            case MAP2GRID_DIST_DWT20:
                                x_i_effective = x_i + kernal_offset;
                                if(x_i_effective > 0.)
                                    W_i *= interpolate(W_r_Daub_interp, x_i_effective);
                                else
                                    flag_active = GBP_FALSE;
                                break;
                                // Distribute using the triangular shaped cloud (TSC) method
                            case MAP2GRID_DIST_TSC:
                                if(x_i < 0.5)
                                    W_i *= (0.75 - x_i * x_i);
                                else if(x_i < 1.5)
                                    W_i *= 0.5 * (1.5 - fabs(x_i)) * (1.5 - fabs(x_i));
                                else
                                    flag_active = GBP_FALSE;
                                break;
                                // Distribute using the cloud-in-cell (CIC) method
                            case MAP2GRID_DIST_CIC:
                                if(fabs(x_i) < 1.)
                                    W_i *= (1. - fabs(x_i));
                                else
                                    flag_active = GBP_FALSE;
                                break;
                                // Distribute using "nearest grid point" (NGP; ie. the simplest and default) method
                            case MAP2GRID_DIST_NGP:
                            default:
                                if(fabs(x_i) <= 0.5 && flag_viable)
                                    W_i *= 1.;
                                else
                                    flag_active = GBP_FALSE;
                                break;
                        }
                    }
                    if(flag_active) { // This flags-out regions of the kernal with no support to save some time
                        // Set the grid indices (enforce periodic BCs; do x-coordinate last) ...
                        //   ... y-coordinate ...
                        k_i[1] = (i_i[1] + j_i[1]);
                        if(k_i[1] < 0)
                            k_i[1] += field->n[1];
                        else
                            k_i[1] = k_i[1] % field->n[1];
                        //   ... z-coordinate ...
                        k_i[2] = i_i[2] + j_i[2];
                        if(k_i[2] < 0)
                            k_i[2] += field->n[2];
                        else
                            k_i[2] = k_i[2] % field->n[2];
                        //   ... x-coordinate ...
                        //     Depending on x-index, add contribution to the
                        //     local array or to the slab buffers.
                        k_i[0] = (i_i[0] + j_i[0]);
                        if(k_i[0] < field->i_R_start_local[0]) {
                            k_i[0] -= (field->i_R_start_local[0] - W_search_lo);
                            if(k_i[0] < 0)
                                SID_exit_error("Left slab buffer limit exceeded by %d element(s).", SID_ERROR_LOGIC,
                                               -k_i[0]);
                            send_left[index_FFT_R(field, k_i)] += W_i * value_i;
                            if(field_norm != NULL)
                                send_left_norm[index_FFT_R(field_norm, k_i)] += W_i * norm_i;
                        } else if(k_i[0] > field->i_R_stop_local[0]) {
                            k_i[0] -= (field->i_R_stop_local[0] + 1);
                            if(k_i[0] >= W_search_hi)
                                SID_exit_error("Right slab buffer limit exceeded by %d element(s).", SID_ERROR_LOGIC,
                                               k_i[0] - W_search_hi + 1);
                            else {
                                send_right[index_FFT_R(field, k_i)] += W_i * value_i;
                                if(field_norm != NULL)
                                    send_right_norm[index_FFT_R(field_norm, k_i)] += W_i * norm_i;
                            }
                        } else {
                            field->field_local[index_local_FFT_R(field, k_i)] += W_i * value_i;
                            if(field_norm != NULL)
                                field_norm->field_local[index_local_FFT_R(field_norm, k_i)] += W_i * norm_i;
                        }
                        flag_viable = GBP_FALSE;
                    }
                }
            }
        }
        // Report the calculation's progress
        SID_check_pcounter(&pcounter, i_p);
    }
    SID_log("Done.", SID_LOG_CLOSE);

    // Perform exchange of slab buffers and add them to the local mass distribution.
    //    Note: it's important that the FFT field not be padded (see above, where
    //          this is set) for this to work the way it's done.
    SID_log("Adding-in the slab buffers...", SID_LOG_OPEN | SID_LOG_TIMER);
    // Numerator first ...
    exchange_slab_buffer_left(send_left, send_size_left, receive_right, &receive_right_size, &(field->slab));
    exchange_slab_buffer_right(send_right, send_size_right, receive_left, &receive_left_size, &(field->slab));
    for(i_b = 0; i_b < n_send_right; i_b++)
        field->field_local[i_b] += receive_left[i_b];
    for(i_b = 0; i_b < n_send_left; i_b++)
        field->field_local[field->n_field_R_local - n_send_left + i_b] += receive_right[i_b];
    // ... then denominator (if it's being used)
    if(field_norm != NULL) {
        exchange_slab_buffer_left(send_left_norm, send_size_left, receive_right_norm, &receive_right_size, &(field_norm->slab));
        exchange_slab_buffer_right(send_right_norm, send_size_right, receive_left_norm, &receive_left_size, &(field_norm->slab));
        for(i_b = 0; i_b < n_send_right; i_b++)
            field_norm->field_local[i_b] += receive_left_norm[i_b];
        for(i_b = 0; i_b < n_send_left; i_b++)
            field_norm->field_local[field_norm->n_field_R_local - n_send_left + i_b] += receive_right[i_b];
    }
    SID_free(SID_FARG send_left);
    SID_free(SID_FARG send_right);
    SID_free(SID_FARG receive_left);
    SID_free(SID_FARG receive_right);
    if(field_norm != NULL) {
        SID_free(SID_FARG send_left_norm);
        SID_free(SID_FARG send_right_norm);
        SID_free(SID_FARG receive_left_norm);
        SID_free(SID_FARG receive_right_norm);
    }
    SID_log("Done.", SID_LOG_CLOSE);

    // Recompute local normalization (more accurate for large sample sizes)
    if(!SID_CHECK_BITFIELD_SWITCH(mode, MAP2GRID_MODE_NONORM)) {
        SID_log("Applying normalization...", SID_LOG_OPEN);
        if(field_norm != NULL) {
            for(i_grid = 0; i_grid < field->n_field_R_local; i_grid++) {
                if(field_norm->field_local[i_grid] != 0)
                    field->field_local[i_grid] /= field_norm->field_local[i_grid];
            }
        }
        if(SID_CHECK_BITFIELD_SWITCH(mode, MAP2GRID_MODE_APPLYFACTOR)) {
            for(i_grid = 0; i_grid < field->n_field_R_local; i_grid++)
                field->field_local[i_grid] *= normalization_constant;
        }
        if(SID_CHECK_BITFIELD_SWITCH(mode, MAP2GRID_MODE_FORCENORM)) {
            norm_local = 0;
            for(i_grid = 0; i_grid < field->n_field_R_local; i_grid++)
                norm_local += (double)field->field_local[i_grid];
            calc_sum_global(&norm_local, &normalization, 1, SID_DOUBLE, CALC_MODE_DEFAULT, SID_COMM_WORLD);
            double normalization_factor;
            normalization_factor = normalization_constant / normalization;
            for(i_grid = 0; i_grid < field->n_field_R_local; i_grid++)
                field->field_local[i_grid] *= normalization_factor;
        }
        SID_log("Done.", SID_LOG_CLOSE, normalization);
    }

    if(W_r_Daub_interp != NULL)
        free_interpolate(SID_FARG W_r_Daub_interp, NULL);

    SID_log("Done.", SID_LOG_CLOSE);
}
void write_match_results(char *      filename_out_dir,
                         char *      filename_out_root,
                         int         i_read,
                         int         j_read,
                         const char *filename_cat1,
                         const char *filename_cat2,
                         plist_info *plist1,
                         plist_info *plist2,
                         int         k_match,
                         float       match_weight_rank_index,
                         int         mode) {
    char    filename_out[256];
    char    filename_out_dir_snap[256];
    FILE *  fp_out;
    int     k_read, l_read;
    int     flag_go;
    int     i_read_start_file;
    int     i_read_stop_file;
    int     i_read_step_file;
    int     n_search_file;
    int     n_search_total;
    int     n_k_match;
    int     flag_match_subgroups;
    char    group_text_prefix[5];
    int     n_matches;
    int     n_files;
    int     n_groups_1;
    int     n_groups_1_local;
    int     n_groups_2;
    int     n_groups_2_local;
    int     i_group;
    int     buffered_count;
    int     buffered_count_local;
    int     j_group;
    int     index_test;
    int     i_rank;
    int *   n_particles;
    int *   n_sub_group;
    int *   file_index_1;
    int *   match_id    = NULL;
    float * match_score = NULL;
    int *   match_count = NULL;
    char    cat_name_1[20];
    char    cat_name_2[20];
    size_t *match_rank  = NULL;
    size_t *match_index = NULL;
    size_t  offset;
    int *   n_return;
    void *  buffer;
    int *   buffer_int;
    size_t *buffer_size_t;
    float * buffer_float;
    int     n_buffer_max = 131072; // 32*32k=1MB for 8-byte values
    int     n_buffer;
    int     i_buffer;
    int     j_buffer;

    switch(k_match) {
        case 0:
            flag_match_subgroups = MATCH_SUBGROUPS;
            sprintf(group_text_prefix, "sub");
            break;
        case 1:
            flag_match_subgroups = MATCH_GROUPS;
            sprintf(group_text_prefix, "");
            break;
    }

    // Intialize filenames
    if(SID_CHECK_BITFIELD_SWITCH(mode, WRITE_MATCHES_MODE_TREES)) {
        sprintf(filename_out_dir_snap, "%s/%s", filename_out_dir, filename_cat1);
        // Create output directory if need-be
        if(filename_out_dir != NULL)
            mkdir(filename_out_dir, 02755);
    } else if(SID_CHECK_BITFIELD_SWITCH(mode, WRITE_MATCHES_MODE_SINGLE))
        sprintf(filename_out_dir_snap, "%s/", filename_out_dir);
    else
        SID_exit_error("Invalid write mode flag (%d).", SID_ERROR_LOGIC, mode);
    if(filename_out_dir != NULL)
        sprintf(filename_out, "%s/%sgroup_matches_%s_%s.dat", filename_out_dir_snap, group_text_prefix, filename_cat1, filename_cat2);
    else
        sprintf(filename_out, "%s_%sgroup_matches_%s_%s.dat", filename_out_root, group_text_prefix, filename_cat1, filename_cat2);

    SID_log("Writing match results to {%s}...", SID_LOG_OPEN | SID_LOG_TIMER, filename_out);

    // Fetch halo counts ...
    n_groups_1 = ((int *)ADaPS_fetch(plist1->data, "n_%sgroups_all_%s", group_text_prefix, filename_cat1))[0];
    n_groups_2 = ((int *)ADaPS_fetch(plist2->data, "n_%sgroups_all_%s", group_text_prefix, filename_cat2))[0];

    // Write header.
    if(SID.I_am_Master) {
        if(filename_out_dir != NULL)
            mkdir(filename_out_dir_snap, 02755);
        if((fp_out = fopen(filename_out, "w")) == NULL)
            SID_exit_error("Could not open {%s} for writing.", SID_ERROR_IO_OPEN, filename_out);
        fwrite(&i_read, sizeof(int), 1, fp_out);
        fwrite(&j_read, sizeof(int), 1, fp_out);
        fwrite(&n_groups_1, sizeof(int), 1, fp_out);
        fwrite(&n_groups_2, sizeof(int), 1, fp_out);
        fwrite(&match_weight_rank_index, sizeof(float), 1, fp_out);
    }

    // Everything else only needs to be written if there are halos to match with
    if(n_groups_1 > 0) {
        // Fetch catalog and matching info ...
        n_groups_1_local = ((int *)ADaPS_fetch(plist1->data, "n_%sgroups_%s", group_text_prefix, filename_cat1))[0];
        file_index_1     = (int *)ADaPS_fetch(plist1->data, "file_index_%sgroups_%s", group_text_prefix, filename_cat1);
        n_groups_2_local = ((int *)ADaPS_fetch(plist2->data, "n_%sgroups_%s", group_text_prefix, filename_cat2))[0];
        match_id         = (int *)ADaPS_fetch(plist1->data, "match_match");
        match_score      = (float *)ADaPS_fetch(plist1->data, "match_score_match");
        match_count      = (int *)ADaPS_fetch(plist1->data, "match_count_match");

        // Generate ranking of matches
        sort(match_id, (size_t)n_groups_1_local, &match_index, SID_INT, SORT_GLOBAL, SORT_COMPUTE_INDEX, SORT_COMPUTE_NOT_INPLACE);
        sort(match_index, (size_t)n_groups_1_local, &match_rank, SID_SIZE_T, SORT_GLOBAL, SORT_COMPUTE_INDEX, SORT_COMPUTE_NOT_INPLACE);
        SID_free(SID_FARG match_index);

        // Now we write the matching results.  We need to write back to the file in the
        //   order that it was read from the halo catalogs, not necessarily the PH order
        //   that it is stored in RAM.  This requires some buffer trickery.
        buffer        = SID_malloc(n_buffer_max * sizeof(size_t));
        buffer_int    = (int *)buffer;
        buffer_size_t = (size_t *)buffer;
        buffer_float  = (float *)buffer;

        // Write match_ids ...
        //    ... loop over all the groups in buffer-sized batches
        SID_log("Writing match IDs...", SID_LOG_OPEN | SID_LOG_TIMER);
        for(i_group = 0, buffered_count_local = 0; i_group < n_groups_1; i_group += n_buffer) {
            // Decide this buffer iteration's size
            n_buffer = GBP_MIN(n_buffer_max, n_groups_1 - i_group);
            // Set the buffer to a default value smaller than the smallest possible data size
            for(i_buffer = 0; i_buffer < n_buffer; i_buffer++)
                buffer_int[i_buffer] = -2; // Min value of match_id is -1
            // Determine if any of the local data is being used for this buffer
            for(j_group = 0; j_group < n_groups_1_local; j_group++) {
                index_test = file_index_1[j_group] - i_group;
                // ... if so, set the appropriate buffer value
                if(index_test >= 0 && index_test < n_buffer) {
                    buffer_int[index_test] = match_id[j_group];
                    buffered_count_local++;
                }
            }
            // Doing a global max on the buffer yields the needed buffer on all ranks
            SID_Allreduce(SID_IN_PLACE, buffer_int, n_buffer, SID_INT, SID_MAX, SID_COMM_WORLD);

            if(SID.I_am_Master) {
                // Sanity check
                for(i_buffer = 0; i_buffer < n_buffer; i_buffer++) {
                    if(buffer_int[i_buffer] < -1 || buffer_int[i_buffer] >= n_groups_2)
                        SID_exit_error(
                                "Illegal match_id result (%d) for group No. %d.  There are %d groups in the target catalog.",
                                SID_ERROR_LOGIC, buffer_int[i_buffer], i_group + i_buffer, n_groups_2);
                }
                // Write the buffer
                fwrite(buffer_int, sizeof(int), (size_t)n_buffer, fp_out);
            }
        }

        // Sanity check
        calc_sum_global(&buffered_count_local, &buffered_count, 1, SID_INT, CALC_MODE_DEFAULT, SID_COMM_WORLD);
        if(buffered_count != n_groups_1)
            SID_exit_error("Buffer counts don't make sense (ie %d!=%d) after writing match IDs.", SID_ERROR_LOGIC,
                           buffered_count, n_groups_1);
        SID_log("Done.", SID_LOG_CLOSE);

        // Write match_score ...
        //    ... loop over all the groups in buffer-sized batches
        SID_log("Writing match scores...", SID_LOG_OPEN | SID_LOG_TIMER);
        for(i_group = 0, buffered_count_local = 0; i_group < n_groups_1; i_group += n_buffer) {
            // Decide this buffer iteration's size
            n_buffer = GBP_MIN(n_buffer_max, n_groups_1 - i_group);
            // Set the buffer to a default value smaller than the smallest possible data size
            for(i_buffer = 0; i_buffer < n_buffer; i_buffer++)
                buffer_float[i_buffer] = -1.; // Min value of match_score is 0.
            // Determine if any of the local data is being used for this buffer
            for(j_group = 0; j_group < n_groups_1_local; j_group++) {
                index_test = file_index_1[j_group] - i_group;
                // ... if so, set the appropriate buffer value
                if(index_test >= 0 && index_test < n_buffer) {
                    buffer_float[index_test] = match_score[j_group];
                    buffered_count_local++;
                }
            }
            // Doing a global max on the buffer yields the needed buffer on all ranks
            SID_Allreduce(SID_IN_PLACE, buffer_float, n_buffer, SID_FLOAT, SID_MAX, SID_COMM_WORLD);

            if(SID.I_am_Master) {
                // Sanity check
                for(i_buffer = 0; i_buffer < n_buffer; i_buffer++) {
                    if(buffer_float[i_buffer] < 0.)
                        SID_exit_error("Illegal match_score result (%f) for group No. %d.", SID_ERROR_LOGIC,
                                       buffer_float[i_buffer], i_group + i_buffer);
                }
                // Write the buffer
                fwrite(buffer, sizeof(float), (size_t)n_buffer, fp_out);
            }
        }

        // Sanity check
        calc_sum_global(&buffered_count_local, &buffered_count, 1, SID_INT, CALC_MODE_DEFAULT, SID_COMM_WORLD);
        if(buffered_count != n_groups_1)
            SID_exit_error("Buffer counts don't make sense (ie %d!=%d) after writing match scores.", SID_ERROR_LOGIC,
                           buffered_count, n_groups_1);
        SID_log("Done.", SID_LOG_CLOSE);

        // Write match_count ...
        //    ... loop over all the groups in buffer-sized batches
        SID_log("Writing match counts...", SID_LOG_OPEN | SID_LOG_TIMER);
        for(i_group = 0, buffered_count_local = 0; i_group < n_groups_1; i_group += n_buffer) {
            // Decide this buffer iteration's size
            n_buffer = GBP_MIN(n_buffer_max, n_groups_1 - i_group);
            // Set the buffer to a default value smaller than the smallest possible data size
            for(i_buffer = 0; i_buffer < n_buffer; i_buffer++)
                buffer_int[i_buffer] = -1; // Min value of match_count is 0.
            // Determine if any of the local data is being used for this buffer
            for(j_group = 0; j_group < n_groups_1_local; j_group++) {
                index_test = file_index_1[j_group] - i_group;
                // ... if so, set the appropriate buffer value
                if(index_test >= 0 && index_test < n_buffer) {
                    buffer_int[index_test] = match_count[j_group];
                    buffered_count_local++;
                }
            }
            // Doing a global max on the buffer yields the needed buffer on all ranks
            SID_Allreduce(SID_IN_PLACE, buffer_int, n_buffer, SID_INT, SID_MAX, SID_COMM_WORLD);

            if(SID.I_am_Master) {
                // Sanity check
                for(i_buffer = 0; i_buffer < n_buffer; i_buffer++) {
                    if(buffer_int[i_buffer] < 0.)
                        SID_exit_error("Illegal match_count result (%f) for group No. %d.", SID_ERROR_LOGIC,
                                       buffer_int[i_buffer], i_group + i_buffer);
                }
                // Write the buffer
                fwrite(buffer, sizeof(int), (size_t)n_buffer, fp_out);
            }
        }

        // Sanity check
        calc_sum_global(&buffered_count_local, &buffered_count, 1, SID_INT, CALC_MODE_DEFAULT, SID_COMM_WORLD);
        if(buffered_count != n_groups_1)
            SID_exit_error("Buffer counts don't make sense (ie %d!=%d) after writing match scores.", SID_ERROR_LOGIC,
                           buffered_count, n_groups_1);
        SID_log("Done.", SID_LOG_CLOSE);

        // Clean-up
        SID_log("Cleaning-up...", SID_LOG_OPEN);
        SID_free(SID_FARG buffer);
        SID_free(SID_FARG match_rank);

        SID_log("Done.", SID_LOG_CLOSE);
    }

    // Close the file
    if(SID.I_am_Master)
        fclose(fp_out);

    SID_log("Done.", SID_LOG_CLOSE);
}