void guppi_psrfits_thread(void *_args) {
    
    /* Get args */
    struct guppi_thread_args *args = (struct guppi_thread_args *)_args;
    pthread_cleanup_push((void *)guppi_thread_set_finished, args);
    
    /* Set cpu affinity */
    cpu_set_t cpuset, cpuset_orig;
    sched_getaffinity(0, sizeof(cpu_set_t), &cpuset_orig);
    CPU_ZERO(&cpuset);
    CPU_SET(1, &cpuset);
    int rv = sched_setaffinity(0, sizeof(cpu_set_t), &cpuset);
    if (rv<0) { 
        guppi_error("guppi_psrfits_thread", "Error setting cpu affinity.");
        perror("sched_setaffinity");
    }

    /* Set priority */
    rv = setpriority(PRIO_PROCESS, 0, args->priority);
    if (rv<0) {
        guppi_error("guppi_psrfits_thread", "Error setting priority level.");
        perror("set_priority");
    }
    
    /* Attach to status shared mem area */
    struct guppi_status st;
    rv = guppi_status_attach(&st);
    if (rv!=GUPPI_OK) {
        guppi_error("guppi_psrfits_thread", 
                    "Error attaching to status shared memory.");
        pthread_exit(NULL);
    }
    pthread_cleanup_push((void *)guppi_status_detach, &st);
    pthread_cleanup_push((void *)set_exit_status, &st);
    
    /* Init status */
    guppi_status_lock_safe(&st);
    hputs(st.buf, STATUS_KEY, "init");
    guppi_status_unlock_safe(&st);
    
    /* Initialize some key parameters */
    struct guppi_params gp;
    struct psrfits pf;
    pf.sub.data = NULL;
    pf.sub.dat_freqs = pf.sub.dat_weights = NULL;
    pf.sub.dat_offsets = pf.sub.dat_scales = NULL;
    pf.hdr.chan_dm = 0.0;
    pf.filenum = 0; // This is crucial
    pthread_cleanup_push((void *)guppi_free_psrfits, &pf);
    pthread_cleanup_push((void *)psrfits_close, &pf);
    //pf.multifile = 0;  // Use a single file for fold mode
    pf.multifile = 1;  // Use a multiple files for fold mode
    pf.quiet = 0;      // Print a message per each subint written
    
    /* Attach to databuf shared mem */
    struct guppi_databuf *db;
    db = guppi_databuf_attach(args->input_buffer);
    if (db==NULL) {
        guppi_error("guppi_psrfits_thread",
                    "Error attaching to databuf shared memory.");
        pthread_exit(NULL);
    }
    pthread_cleanup_push((void *)guppi_databuf_detach, db);
    
    /* Loop */
    int curblock=0, total_status=0, firsttime=1, run=1, got_packet_0=0;
    int mode=SEARCH_MODE;
    char *ptr;
    char tmpstr[256];
    struct foldbuf fb;
    struct polyco pc[64];  
    memset(pc, 0, sizeof(pc));
    int n_polyco_written=0;
    float *fold_output_array = NULL;
    int scan_finished=0;
    signal(SIGINT, cc);
    do {
        /* Note waiting status */
        guppi_status_lock_safe(&st);
        if (got_packet_0)
            sprintf(tmpstr, "waiting(%d)", curblock);
        else
            sprintf(tmpstr, "ready");
        hputs(st.buf, STATUS_KEY, tmpstr);
        guppi_status_unlock_safe(&st);
        
        /* Wait for buf to have data */
        rv = guppi_databuf_wait_filled(db, curblock);
        if (rv!=0) {
            // This is a big ol' kludge to avoid this process hanging
            // due to thread synchronization problems.
            sleep(1);
            continue; 
        }

        /* Note current block */
        guppi_status_lock_safe(&st);
        hputi4(st.buf, "CURBLOCK", curblock);
        guppi_status_unlock_safe(&st);

        /* See how full databuf is */
        total_status = guppi_databuf_total_status(db);
        
        /* Read param structs for this block */
        ptr = guppi_databuf_header(db, curblock);
        if (firsttime) {
            guppi_read_obs_params(ptr, &gp, &pf);
            firsttime = 0;
        } else {
            guppi_read_subint_params(ptr, &gp, &pf);
        }

        /* Find out what mode this data is in */
        mode = psrfits_obs_mode(pf.hdr.obs_mode);

        /* Check if we got both packet 0 and a valid observation
         * start time.  If so, flag writing to start.
         */
        if (got_packet_0==0 && gp.packetindex==0 && gp.stt_valid==1) {
            got_packet_0 = 1;
            guppi_read_obs_params(ptr, &gp, &pf);
            guppi_update_ds_params(&pf);
            memset(pc, 0, sizeof(pc));
            n_polyco_written=0;
        }

        /* If actual observation has started, write the data */
        if (got_packet_0) { 

            /* Note waiting status */
            guppi_status_lock_safe(&st);
            hputs(st.buf, STATUS_KEY, "writing");
            guppi_status_unlock_safe(&st);
            
            /* Get the pointer to the current data */
            if (mode==FOLD_MODE) {
                fb.nchan = pf.hdr.nchan;
                fb.npol = pf.hdr.npol;
                fb.nbin = pf.hdr.nbin;
                fb.data = (float *)guppi_databuf_data(db, curblock);
                fb.count = (unsigned *)(guppi_databuf_data(db, curblock)
                        + foldbuf_data_size(&fb));
                fold_output_array = (float *)realloc(fold_output_array,
                        sizeof(float) * pf.hdr.nbin * pf.hdr.nchan * 
                        pf.hdr.npol);
                pf.sub.data = (unsigned char *)fold_output_array;
                pf.fold.pc = (struct polyco *)(guppi_databuf_data(db,curblock)
                        + foldbuf_data_size(&fb) + foldbuf_count_size(&fb));
            } else 
                pf.sub.data = (unsigned char *)guppi_databuf_data(db, curblock);
            
            /* Set the DC and Nyquist channels explicitly to zero */
            /* because of the "FFT Problem" that splits DC power  */
            /* into those two bins.                               */
            zero_end_chans(&pf);

            /* Output only Stokes I (in place) */
            if (pf.hdr.onlyI && pf.hdr.npol==4)
                get_stokes_I(&pf);

            /* Downsample in frequency (in place) */
            if (pf.hdr.ds_freq_fact > 1)
                downsample_freq(&pf);

            /* Downsample in time (in place) */
            if (pf.hdr.ds_time_fact > 1)
                downsample_time(&pf);

            /* Folded data needs a transpose */
            if (mode==FOLD_MODE)
                normalize_transpose_folds(fold_output_array, &fb);

            /* Write the data */
            int last_filenum = pf.filenum;
            psrfits_write_subint(&pf);

            /* Any actions that need to be taken when a new file
             * is created.
             */
            if (pf.filenum!=last_filenum) {
                /* No polycos yet written to the new file */
                n_polyco_written=0;
            }

            /* Write the polycos if needed */
            int write_pc=0, i, j;
            for (i=0; i<pf.fold.n_polyco_sets; i++) {
                if (pf.fold.pc[i].used==0) continue; 
                int new_pc=1;
                for (j=0; j<n_polyco_written; j++) {
                    if (polycos_differ(&pf.fold.pc[i], &pc[j])==0) {
                        new_pc=0;
                        break;
                    }
                }
                if (new_pc || n_polyco_written==0) {
                    pc[n_polyco_written] = pf.fold.pc[i];
                    n_polyco_written++;
                    write_pc=1;
                } else {
                    pf.fold.pc[i].used = 0; // Already have this one
                }
            }
            if (write_pc) 
                psrfits_write_polycos(&pf, pf.fold.pc, pf.fold.n_polyco_sets);

            /* Is the scan complete? */
            if ((pf.hdr.scanlen > 0.0) && 
                (pf.T > pf.hdr.scanlen)) scan_finished = 1;
            
            /* For debugging... */
            if (gp.drop_frac > 0.0) {
               printf("Block %d dropped %.3g%% of the packets\n", 
                      pf.tot_rows, gp.drop_frac*100.0);
            }

        }

        /* Mark as free */
        guppi_databuf_set_free(db, curblock);
        
        /* Go to next block */
        curblock = (curblock + 1) % db->n_block;
        
        /* Check for cancel */
        pthread_testcancel();
        
    } while (run && !scan_finished);
    
    /* Cleanup */
    
    if (fold_output_array!=NULL) free(fold_output_array);

    pthread_exit(NULL);
    
    pthread_cleanup_pop(0); /* Closes psrfits_close */
    pthread_cleanup_pop(0); /* Closes guppi_free_psrfits */
    pthread_cleanup_pop(0); /* Closes set_exit_status */
    pthread_cleanup_pop(0); /* Closes set_finished */
    pthread_cleanup_pop(0); /* Closes guppi_status_detach */
    pthread_cleanup_pop(0); /* Closes guppi_databuf_detach */
}
int main(int argc, char *argv[]) {
    Cmdline *cmd;
    struct psrfits pfi, pfo; // input and output
    struct subband_info si;
    int stat=0, padding=0, userN=0;

    // Call usage() if we have no command line arguments
    if (argc == 1) {
        Program = argv[0];
        usage();
        exit(0);
    }

    // Parse the command line using the excellent program Clig
    cmd = parseCmdline(argc, argv);

    // Open the input PSRFITs files
    psrfits_set_files(&pfi, cmd->argc, cmd->argv);

    // Use the dynamic filename allocation
    if (pfi.numfiles==0) pfi.filenum = cmd->startfile;
    pfi.tot_rows = pfi.N = pfi.T = pfi.status = 0;
    int rv = psrfits_open(&pfi);
    if (rv) { fits_report_error(stderr, rv); exit(1); }

    // Read the user weights if requested
    si.userwgts = NULL;
    if (cmd->wgtsfileP) {
        read_weights(cmd->wgtsfile, &userN, &si.userwgts);
        if (userN != pfi.hdr.nchan) {
            printf("Error!:  Input data has %d channels, but '%s' contains only %d weights!\n",
                   pfi.hdr.nchan, cmd->wgtsfile, userN);
            exit(0);
        }
        printf("Overriding input channel weights with those in '%s'\n",
               cmd->wgtsfile);
    }

    // Initialize the subbanding
    // (including reading the first row of data and
    //  putting it in si->fbuffer)
    init_subbanding(&pfi, &pfo, &si, cmd);

    if (cmd->outputbasenameP)
      strcpy(pfo.basefilename, cmd->outputbasename);

    // Loop through the data
    do {
        // Put the overlapping parts from the next block into si->buffer
        float *ptr = pfi.sub.fdata + si.buflen * si.bufwid;
        if (padding==0)
            stat = psrfits_read_part_DATA(&pfi, si.max_overlap, si.numunsigned, ptr);
        if (stat || padding) { // Need to use padding since we ran out of data
            printf("Adding a missing row (#%d) of padding to the subbands.\n",
                   pfi.tot_rows);
            // Now fill the last part of si->fbuffer with the chan_avgs so that
            // it acts like a correctly read block (or row)
            fill_chans_with_avgs(si.max_overlap, si.bufwid,
                                 ptr, si.chan_avgs);
        }
        //print_raw_chan_stats(pfi.sub.data, pfi.hdr.nsblk,
        //                     pfi.hdr.nchan, pfi.hdr.npol);

        // if the input data isn't 8 bit, unpack:
        if (pfi.hdr.nbits == 2)
            pf_unpack_2bit_to_8bit(&pfi, si.numunsigned);
        else if (pfi.hdr.nbits == 4)
            pf_unpack_4bit_to_8bit(&pfi, si.numunsigned);

        if ((pfo.hdr.ds_time_fact == 1) &&
            (pfo.hdr.ds_freq_fact == 1)) {
            // No subbanding is needed, so just copy the float buffer
            // This is useful if we are just changing the number of bits
            // Could do it without a copy by simply exchanging pointers
            // to the fdata buffers in pfo and pfi...
            memcpy(pfo.sub.fdata, pfi.sub.fdata,
                   pfi.hdr.nsblk * pfi.hdr.npol * pfi.hdr.nchan * sizeof(float));
        } else {
            // Now create the subbanded row in the output buffer
            make_subbands(&pfi, &si);
        }

        // Output only Stokes I (in place via floats)
        if (pfo.hdr.onlyI && pfo.hdr.npol==4)
            get_stokes_I(&pfo);

        // Downsample in time (in place via floats)
        if (pfo.hdr.ds_time_fact > 1)
            downsample_time(&pfo);

        // Compute new scales and offsets so that we can pack
        // into 8-bits reliably
        if (pfo.rownum == 1)
            new_scales_and_offsets(&pfo, si.numunsigned, cmd);

        // Convert the floats back to bytes in the output array
        un_scale_and_offset_data(&pfo, si.numunsigned);
        //print_raw_chan_stats(pfo.sub.data, pfo.hdr.nsblk / pfo.hdr.ds_time_fact,
        //                     pfo.hdr.nchan / pfo.hdr.ds_freq_fact, pfo.hdr.npol);
        
	// pack into 2 or 4 bits if needed
        if (pfo.hdr.nbits == 2)
            pf_pack_8bit_to_2bit(&pfo, si.numunsigned);
        else if (pfo.hdr.nbits == 4)
            pf_pack_8bit_to_4bit(&pfo, si.numunsigned);

        // Write the new row to the output file
        pfo.sub.offs = (pfo.tot_rows+0.5) * pfo.sub.tsubint;
        psrfits_write_subint(&pfo);

        // Break out of the loop here if stat is set
        if (stat) break;

        // shift the last part of the current row into the "last-row"
        // part of the data buffer
        memcpy(si.fbuffer, si.fbuffer + si.buflen * si.bufwid,
               si.max_overlap * si.bufwid * sizeof(float));

        // Read the next row (or padding)
        padding = get_current_row(&pfi, &si);

        // Set the new weights properly
        new_weights(&pfi, &pfo);
        
    } while (pfi.status == 0);
    
    print_clips(&pfo);
    rv = psrfits_close(&pfi);
    if (rv>100) { fits_report_error(stderr, rv); }
    rv = psrfits_close(&pfo);
    if (rv>100) { fits_report_error(stderr, rv); }
    exit(0);
}