// helper function (compare structured object to ordinal computation)
void runtest_dotprod_cccf(unsigned int _n)
{
    float tol = 1e-3;
    float complex h[_n];
    float complex x[_n];

    // generate random coefficients
    unsigned int i;
    for (i=0; i<_n; i++) {
        h[i] = randnf() + randnf() * _Complex_I;
        x[i] = randnf() + randnf() * _Complex_I;
    }
    
    // compute expected value (ordinal computation)
    float complex y_test;
    dotprod_cccf_run(h, x, _n, &y_test);

    // create and run dot product object
    float complex y;
    dotprod_cccf dp;
    dp = dotprod_cccf_create(h,_n);
    dotprod_cccf_execute(dp, x, &y);
    dotprod_cccf_destroy(dp);

    // print results
    if (liquid_autotest_verbose) {
        printf("  dotprod-cccf-%-4u : %12.8f + j%12.8f (expected %12.8f + j%12.8f)\n",
                _n, crealf(y), cimagf(y), crealf(y_test), cimagf(y_test));
    }

    // validate result
    CONTEND_DELTA(crealf(y), crealf(y_test), tol);
    CONTEND_DELTA(cimagf(y), cimagf(y_test), tol);
}
// re-create the structured dotprod object
dotprod_cccf dotprod_cccf_recreate(dotprod_cccf    _q,
                                   float complex * _h,
                                   unsigned int    _n)
{
    // completely destroy and re-create dotprod object
    dotprod_cccf_destroy(_q);
    return dotprod_cccf_create(_h,_n);
}
// 
// AUTOTEST: dot product with floating-point data
//
void autotest_dotprod_cccf_rand16()
{
    float complex h[16] = {
      0.17702709 +   1.38978455*_Complex_I,  0.91294148 +   0.39217381*_Complex_I,
     -0.80607338 +   0.76477512*_Complex_I,  0.05099755 +  -0.87350051*_Complex_I,
      0.44513826 +  -0.49490569*_Complex_I,  0.14754967 +   2.04349962*_Complex_I,
      1.07246623 +   1.08146290*_Complex_I, -1.14028088 +   1.83380899*_Complex_I,
      0.38105361 +  -0.45591846*_Complex_I,  0.32605401 +   0.34440081*_Complex_I,
     -0.05477144 +   0.60832595*_Complex_I,  1.81667523 +  -1.12238075*_Complex_I,
     -0.87190497 +   1.10743858*_Complex_I,  1.30921403 +   1.24438643*_Complex_I,
      0.55524695 +  -1.94931519*_Complex_I, -0.87191170 +   0.91693119*_Complex_I,
    };

    float complex x[16] = {
     -2.19591953 +  -0.93229692*_Complex_I,  0.17150376 +   0.56165114*_Complex_I,
      1.58354529 +  -0.50696037*_Complex_I,  1.40929619 +   0.87868803*_Complex_I,
     -0.75505072 +  -0.30867372*_Complex_I, -0.09821367 +  -0.73949106*_Complex_I,
      0.03785571 +   0.72763665*_Complex_I, -1.20262636 +  -0.88838102*_Complex_I,
      0.23323685 +   0.12456235*_Complex_I,  0.34593736 +   0.02529594*_Complex_I,
      0.33669564 +   0.39064649*_Complex_I, -2.45003867 +  -0.54862205*_Complex_I,
     -2.64870707 +   2.33444473*_Complex_I, -0.92284477 +  -2.45121397*_Complex_I,
      0.24852918 +  -0.62409860*_Complex_I, -0.87039907 +   0.90921212*_Complex_I,
    };

    float complex y;
    float complex test = -0.604285042605890 - 12.390925785344704 * _Complex_I;

    float tol = 1e-3f;

    dotprod_cccf_run(h,x,16,&y);
    CONTEND_DELTA( crealf(y), crealf(test), tol);
    CONTEND_DELTA( cimagf(y), cimagf(test), tol);

    dotprod_cccf_run4(h,x,16,&y);
    CONTEND_DELTA( crealf(y), crealf(test), tol);
    CONTEND_DELTA( cimagf(y), cimagf(test), tol);

    // test object
    dotprod_cccf q = dotprod_cccf_create(h,16);
    dotprod_cccf_execute(q,x,&y);
    CONTEND_DELTA( crealf(y), crealf(test), tol);
    CONTEND_DELTA( cimagf(y), cimagf(test), tol);
    dotprod_cccf_destroy(q);
}
int main() {
    // options
    unsigned int num_channels=64;   // must be even number
    unsigned int num_symbols=16;    // number of symbols
    unsigned int m=3;               // filter delay (symbols)
    float beta = 0.9f;              // filter excess bandwidth factor
    float phi = 0.0f;               // carrier phase offset;
    float dphi = 0.04f;            // carrier frequency offset

    // number of frames (compensate for filter delay)
    unsigned int num_frames = num_symbols + 2*m;
    unsigned int num_samples = num_channels * num_frames;
    unsigned int i;
    unsigned int j;

    // create filter prototype
    unsigned int h_len = 2*num_channels*m + 1;
    float h[h_len];
    float complex hc[h_len];
    float complex gc[h_len];
    liquid_firdes_rkaiser(num_channels, m, beta, 0.0f, h);
    unsigned int g_len = 2*num_channels*m;
    for (i=0; i<g_len; i++) {
        hc[i] = h[i];
        gc[i] = h[g_len-i-1] * cexpf(_Complex_I*dphi*i);
    }

    // data arrays
    float complex s[num_channels];                  // input symbols
    float complex y[num_samples];                   // time-domain samples
    float complex Y0[num_frames][num_channels];     // channelized output
    float complex Y1[num_frames][num_channels];     // channelized output

    // create ofdm/oqam generator object and generate data
    ofdmoqam qs = ofdmoqam_create(num_channels, m, beta, 0.0f, LIQUID_SYNTHESIZER, 0);
    for (i=0; i<num_frames; i++) {
        for (j=0; j<num_channels; j++) {
            if (i<num_symbols) {
#if 0
                // QPSK on all subcarriers
                s[j] = (rand() % 2 ? 1.0f : -1.0f) +
                       (rand() % 2 ? 1.0f : -1.0f) * _Complex_I;
                s[j] *= 1.0f / sqrtf(2.0f);
#else
                // BPSK on even subcarriers
                s[j] =  rand() % 2 ? 1.0f : -1.0f;
                s[j] *= (j%2)==0 ? 1.0f : 0.0f;
#endif
            } else {
                s[j] = 0.0f;
            }
        }

        // run synthesizer
        ofdmoqam_execute(qs, s, &y[i*num_channels]);
    }
    ofdmoqam_destroy(qs);

    // channel
    for (i=0; i<num_samples; i++)
        y[i] *= cexpf(_Complex_I*(phi + dphi*i));


    //
    // analysis filterbank (receiver)
    //

    // create filterbank manually
    dotprod_cccf dp[num_channels];  // vector dot products
    windowcf w[num_channels];       // window buffers

#if DEBUG
    // print coefficients
    printf("h_prototype:\n");
    for (i=0; i<h_len; i++)
        printf("  h[%3u] = %12.8f\n", i, h[i]);
#endif

    // create objects
    unsigned int gc_sub_len = 2*m;
    float complex gc_sub[gc_sub_len];
    for (i=0; i<num_channels; i++) {
        // sub-sample prototype filter, loading coefficients in
        // reverse order
#if 0
        for (j=0; j<gc_sub_len; j++)
            gc_sub[j] = h[j*num_channels+i];
#else
        for (j=0; j<gc_sub_len; j++)
            gc_sub[gc_sub_len-j-1] = gc[j*num_channels+i];
#endif

        // create window buffer and dotprod objects
        dp[i] = dotprod_cccf_create(gc_sub, gc_sub_len);
        w[i]  = windowcf_create(gc_sub_len);

#if DEBUG
        printf("gc_sub[%u] : \n", i);
        for (j=0; j<gc_sub_len; j++)
            printf("  g[%3u] = %12.8f + %12.8f\n", j, crealf(gc_sub[j]), cimagf(gc_sub[j]));
#endif
    }

    // generate DFT object
    float complex x[num_channels];  // time-domain buffer
    float complex X[num_channels];  // freq-domain buffer
#if 0
    fftplan fft = fft_create_plan(num_channels, X, x, FFT_REVERSE, 0);
#else
    fftplan fft = fft_create_plan(num_channels, X, x, FFT_FORWARD, 0);
#endif

    // 
    // run analysis filter bank
    //
#if 0
    unsigned int filter_index = 0;
#else
    unsigned int filter_index = num_channels-1;
#endif
    float complex y_hat;    // input sample
    float complex * r;      // read pointer
    for (i=0; i<num_frames; i++) {

        // load buffers
        for (j=0; j<num_channels; j++) {
            // grab sample
            y_hat = y[i*num_channels + j];

            // push sample into buffer at filter index
            windowcf_push(w[filter_index], y_hat);

            // decrement filter index
            filter_index = (filter_index + num_channels - 1) % num_channels;
            //filter_index = (filter_index + 1) % num_channels;
        }

        // execute filter outputs, reversing order of output (not
        // sure why this is necessary)
        for (j=0; j<num_channels; j++) {
            windowcf_read(w[j], &r);
            dotprod_cccf_execute(dp[j], r, &X[num_channels-j-1]);
        }

#if 1
        // compensate for carrier frequency offset (before transform)
        for (j=0; j<num_channels; j++) {
            X[j] *= cexpf(-_Complex_I*(dphi*i*num_channels));
        }
#endif

        // execute DFT, store result in buffer 'x'
        fft_execute(fft);

#if 0
        // compensate for carrier frequency offset (after transform)
        for (j=0; j<num_channels; j++) {
            x[j] *= cexpf(-_Complex_I*(dphi*i*num_channels));
        }
#endif

        // move to output array
        for (j=0; j<num_channels; j++)
            Y0[i][j] = x[j];
    }


    // destroy objects
    for (i=0; i<num_channels; i++) {
        dotprod_cccf_destroy(dp[i]);
        windowcf_destroy(w[i]);
    }
    fft_destroy_plan(fft);

#if 0
    // print filterbank channelizer
    printf("\n");
    printf("filterbank channelizer:\n");
    for (i=0; i<num_symbols; i++) {
        printf("%3u: ", i);
        for (j=0; j<num_channels; j++) {
            printf("  %8.5f+j%8.5f, ", crealf(Y0[i][j]), cimagf(Y0[i][j]));
        }
        printf("\n");
    }
#endif

    // 
    // export data
    //
    FILE*fid = fopen(OUTPUT_FILENAME,"w");
    fprintf(fid,"%% %s: auto-generated file\n\n", OUTPUT_FILENAME);
    fprintf(fid,"clear all;\nclose all;\n\n");
    fprintf(fid,"num_channels=%u;\n", num_channels);
    fprintf(fid,"num_symbols=%u;\n", num_symbols);
    fprintf(fid,"num_frames = %u;\n", num_frames);
    fprintf(fid,"num_samples = num_frames*num_channels;\n");

    fprintf(fid,"y = zeros(1,%u);\n",  num_samples);
    fprintf(fid,"Y0 = zeros(%u,%u);\n", num_frames, num_channels);
    fprintf(fid,"Y1 = zeros(%u,%u);\n", num_frames, num_channels);
    
    for (i=0; i<num_frames; i++) {
        for (j=0; j<num_channels; j++) {
            fprintf(fid,"Y0(%4u,%4u) = %12.4e + j*%12.4e;\n", i+1, j+1, crealf(Y0[i][j]), cimagf(Y0[i][j]));
            fprintf(fid,"Y1(%4u,%4u) = %12.4e + j*%12.4e;\n", i+1, j+1, crealf(Y1[i][j]), cimagf(Y1[i][j]));
        }
    }

    // plot BPSK results
    fprintf(fid,"figure;\n");
    fprintf(fid,"plot(Y0(:,1:2:end),'x');\n");
    fprintf(fid,"axis([-1 1 -1 1]*1.2*sqrt(num_channels));\n");
    fprintf(fid,"axis square;\n");
    fprintf(fid,"grid on;\n");

    fclose(fid);
    printf("results written to '%s'\n", OUTPUT_FILENAME);

    printf("done.\n");
    return 0;
}
// 
// AUTOTEST: structured dot product, odd lengths
//
void autotest_dotprod_cccf_struct_lengths()
{
    float tol = 2e-6;
    float complex y;

    float complex h[35] = {
      1.11555653 +   2.30658043*_Complex_I, -0.36133676 +  -0.10917327*_Complex_I,
      0.17714505 +  -2.14631440*_Complex_I,  2.20424609 +   0.59063608*_Complex_I,
     -0.44699194 +   0.23369318*_Complex_I,  0.60613931 +   0.21868288*_Complex_I,
     -1.18746289 +  -0.52159563*_Complex_I, -0.46277775 +   0.75010157*_Complex_I,
      0.93796307 +   0.28608151*_Complex_I, -2.18699829 +   0.38029319*_Complex_I,
      0.16145611 +   0.18343353*_Complex_I, -0.62653631 +  -1.79037656*_Complex_I,
     -0.67042462 +   0.11044084*_Complex_I,  0.70333438 +   1.78729174*_Complex_I,
     -0.32923580 +   0.78514690*_Complex_I,  0.27534332 +  -0.56377431*_Complex_I,
      0.41492559 +   1.37176526*_Complex_I,  3.25368958 +   2.70495218*_Complex_I,
      1.63002035 +  -0.14193750*_Complex_I,  2.22057186 +   0.55056461*_Complex_I,
      1.40896777 +   0.80722903*_Complex_I, -0.22334033 +  -0.14227395*_Complex_I,
     -1.48631186 +   0.53610531*_Complex_I, -1.91632185 +   0.88755083*_Complex_I,
     -0.52054895 +  -0.35572001*_Complex_I, -1.56515607 +  -0.41448794*_Complex_I,
     -0.91107117 +   0.17059659*_Complex_I, -0.77007659 +   2.73381816*_Complex_I,
     -0.46645585 +   0.38994666*_Complex_I,  0.80317663 +  -0.41756968*_Complex_I,
      0.26992512 +   0.41828145*_Complex_I, -0.72456446 +   1.25002030*_Complex_I,
      1.19573306 +   0.98449546*_Complex_I,  1.42491943 +  -0.55426305*_Complex_I,
      1.08243614 +   0.35774368*_Complex_I, };

    float complex x[35] = {
     -0.82466736 +  -1.39329228*_Complex_I, -1.46176052 +  -1.96218827*_Complex_I,
     -1.28388174 +  -0.07152934*_Complex_I, -0.51910014 +  -0.37915971*_Complex_I,
     -0.65964708 +  -0.98417534*_Complex_I, -1.40213479 +  -0.82198463*_Complex_I,
      0.86051446 +   0.97926463*_Complex_I,  0.26257342 +   0.76586696*_Complex_I,
      0.72174183 +  -1.89884636*_Complex_I, -0.26018863 +   1.06920599*_Complex_I,
      0.57949117 +  -0.77431546*_Complex_I,  0.84635184 +  -0.81123009*_Complex_I,
     -1.12637629 +  -0.42027412*_Complex_I, -1.04214881 +   0.90519721*_Complex_I,
      0.54458433 +  -1.03487314*_Complex_I, -0.17847893 +   2.20358978*_Complex_I,
      0.19642532 +  -0.07449796*_Complex_I, -1.84958229 +   0.13218920*_Complex_I,
     -1.49042886 +   0.81610408*_Complex_I, -0.27466940 +  -1.48438409*_Complex_I,
      0.29239375 +   0.72443343*_Complex_I, -1.20243456 +  -2.77032750*_Complex_I,
     -0.41784260 +   0.77455254*_Complex_I,  0.37737465 +  -0.52426993*_Complex_I,
     -1.25500377 +   1.76270122*_Complex_I,  1.55976056 +  -1.18189171*_Complex_I,
     -0.05111343 +  -1.18849396*_Complex_I, -1.92966664 +   0.66504899*_Complex_I,
     -2.82387897 +   1.41128242*_Complex_I, -1.48171326 +  -0.03347470*_Complex_I,
      0.38047273 +  -1.40969799*_Complex_I,  1.71995272 +   0.00298203*_Complex_I,
      0.56040910 +  -0.12713027*_Complex_I, -0.46653022 +  -0.65450499*_Complex_I,
      0.15515755 +   1.58944030*_Complex_I, };

    float complex v32 = -11.5100903519506 - 15.3575526884014*_Complex_I;
    float complex v33 = -10.7148314918614 - 14.9578463360225*_Complex_I;
    float complex v34 = -11.7423673921916 - 15.6318827515320*_Complex_I;
    float complex v35 = -12.1430314741466 - 13.8559085000689*_Complex_I;

    // 
    dotprod_cccf dp;

    // n = 32
    dp = dotprod_cccf_create(h,32);
    dotprod_cccf_execute(dp, x, &y);
    CONTEND_DELTA(y, v32, tol);
    dotprod_cccf_destroy(dp);
    if (liquid_autotest_verbose) {
        printf("  dotprod-cccf-32 : %12.8f + j%12.8f (expected %12.8f + j%12.8f)\n",
                crealf(y), cimagf(y), crealf(v32), cimagf(v32));
    }

    // n = 33
    dp = dotprod_cccf_create(h,33);
    dotprod_cccf_execute(dp, x, &y);
    CONTEND_DELTA(y, v33, tol);
    dotprod_cccf_destroy(dp);
    if (liquid_autotest_verbose) {
        printf("  dotprod-cccf-33 : %12.8f + j%12.8f (expected %12.8f + j%12.8f)\n",
                crealf(y), cimagf(y), crealf(v33), cimagf(v33));
    }

    // n = 34
    dp = dotprod_cccf_create(h,34);
    dotprod_cccf_execute(dp, x, &y);
    CONTEND_DELTA(y, v34, tol);
    dotprod_cccf_destroy(dp);
    if (liquid_autotest_verbose) {
        printf("  dotprod-cccf-34 : %12.8f + j%12.8f (expected %12.8f + j%12.8f)\n",
                crealf(y), cimagf(y), crealf(v34), cimagf(v34));
    }

    // n = 35
    dp = dotprod_cccf_create(h,35);
    dotprod_cccf_execute(dp, x, &y);
    CONTEND_DELTA(y, v35, tol);
    dotprod_cccf_destroy(dp);
    if (liquid_autotest_verbose) {
        printf("  dotprod-cccf-35 : %12.8f + j%12.8f (expected %12.8f + j%12.8f)\n",
                crealf(y), cimagf(y), crealf(v35), cimagf(v35));
    }
}