/*

/*

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%                                                                          %%
%%                                              Wroc{\l}aw, September 2022  %%
%%                                                                          %%
%%                                                                          %%
%%                                                                          %%
%%                  LINEAR-TIME ALGORITHM FOR COMPUTING                     %%
%%                 THE BERNSTEIN-B\'{E}ZIER COEFFICIENTS                    %%
%%                      OF B-SPLINE BASIS FUNCTIONS                         %%
%%                       (test program for curves)                          %%
%%                                                                          %%
%%                           GNU C Compiler 11.2.0                          %%
%%                                                                          %%
%%                      Filip Chudy, Pawe\{l} Wo\'{z}ny                     %%
%%                                                                          %%
%%                                                                          %%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

*/

*/

#include<stdio.h>
#include<stdlib.h>
#include<math.h>
#include<time.h>

#define NMULT 50 // N = 50 * n


// max n = 50
// max m = 15
// max M = 100
// max N = 2501
// max d = 3


float knots[610];                     // n + 2m
float samples[2510];                   // N
float points[110][70][3];              // M * (n + m) * d

float coeffs[510][90][20];         // n * (n + 2m) * (m + 1) 
float points_work[20][70][3];      // m * (n + m) * d 

float h[50];                           // m
float post_bezier[90];                 // n + 2m 
float splines[20][90];              // m * (n + 2m)

float result_points[110][2510][3];     // M * N * d
float result_points_new[110][2510][3]; // M * N * d
float result_points_spl[110][2510][3]; // M * N * d





float float_rand( float min, float max )
{
    float scale = rand() / (float) RAND_MAX; // [0, 1.0]
    return min + scale * (max - min);      // [min, max]
}

int generate_knots_and_samples(int n, int m)
{
    for(int i = 0; i <= m; i++) knots[i] = 1.0;
    samples[0] = knots[m];
    int s = 1;
    for(int i = 1; i <= n; i++) 
    {
        knots[m + i] = knots[m + i - 1] + float_rand(0.02, 1.0);
        float step = knots[m + i] - knots[m + i - 1];
        for(int q = 1; q < NMULT; q++)
        {
            samples[s] = knots[m + i - 1] + step * q / NMULT;
            s++;
        }
        samples[s] = knots[m + i];
        s++;
    }
    for(int i = 1; i <= m; i++) knots[m + n + i] = knots[m + n];
    return s;
}

void generate_points(int n, int m, int d, int total_sets)
{
    for(int set_no = 0; set_no < total_sets; set_no++)
    {
        for(int i = 0; i < n + m + 1; i++)
        {
            for(int dim_c = 0; dim_c < d; dim_c++) 
            {
                points[set_no][i][dim_c] = float_rand(-1.0, 1.0);
            }
        }
    }
}

void zero_coeffs(int n, int m)
{
    for(int j = 0; j < n; j++)
    {
        for(int i = 0; i < n + m; i++)
        {
            for(int k = 0; k <= m; k++) coeffs[j][i][k] = 0.0;
        }
    }
}











































// *************
// NEW ALGORITHM
// *************

void compute_new_coeffs(int n, int m)
{
    float v, c1, c2, c3;
    for(int j = 0; j < n; j++)
    {
        float numerator = pow(knots[j + 1 + m] - knots[j + m], m - 1);
        coeffs[j][j + m][m] = numerator;
        for(int k = 2; k <= m; k++)
            coeffs[j][j + m][m] /= knots[j + k + m] - knots[j + m];
        coeffs[j][j - m + m][0] = numerator;
        for(int k = 2; k <= m; k++)
            coeffs[j][j - m + m][0] /= knots[j + 1 + m] - knots[j + 1 - k + m];
    }
    for(int i = n - 2; i >= n - m; i--)
    {
        c1 = (knots[n - 1 + m] - knots[i + m]) / (knots[n + m] - knots[i + m]);
        c2 = (knots[n + m] - knots[n - 1 + m]) 
            / (knots[n + m] - knots[i + 1 + m]);
        for(int k = m - 1; k >= 0; k--)
            coeffs[n - 1][i + m][k] = c1 * coeffs[n - 1][i + m][k + 1] 
                + c2 * coeffs[n - 1][i + 1 + m][k + 1];
    }
    for(int j = n - 2; j >= 0; j--)
    {
        for(int i = j - 1; i >= j - m + 1; i--)
        {
            coeffs[j][i + m][m] = coeffs[j + 1][i + m][0];
            v = (knots[m + i + 1 + m] - knots[i + m]) 
                / (knots[m + i + 2 + m] - knots[i + 1 + m]);
            float denom = (knots[j + 1 + m] - knots[i + m]);
            c1 = (knots[j + m] - knots[i + m]) / denom;
            c2 = (knots[j + 1 + m] - knots[m + i + 2 + m]) / denom; 
            c3 = (knots[m + i + 2 + m] - knots[j + m]) / denom;
            for(int k = m - 1; k >= 0; k--)
            {
                coeffs[j][i + m][k] = c1 * coeffs[j][i + m][k + 1] 
                    + v * (c2 * coeffs[j][i + 1 + m][k] 
                            + c3 * coeffs[j][i + 1 + m][k + 1]);
            }
        }
    }
}

void compute_h(unsigned int n, float t)
{
    float u = 1.0 - t;
    unsigned int n1 = n + 1;

    h[0] = 1.0;
    if(t <= 0.5)
    {
        u = t / u;
        for(int k = 1; k <= n; k++)
        {
            h[k] = h[k - 1] * u * (n1 - k);
            h[k] = h[k] / (k + h[k]);
        }
    }
    else
    {
        u = u / t;
        for(int k = 1; k <= n; k++)
        {
            h[k] = h[k - 1] * (n1 - k);
            h[k] = h[k] / (k * u + h[k]);
        }
    }
    return;
}

float eval_bezier_1d(unsigned int n, float t, int j, int i)
{
    float result;
    result = coeffs[j][i + n][0];
    if(t <= 0.5)
    {
        for(int k = 1; k <= n; k++)
        {
            float h1 = 1 - h[k];
            result = h1 * result + h[k] * coeffs[j][i + n][k];
        }
    }
    else
    {
        for(int k = 1; k <= n; k++)
        {
            float h1 = 1 - h[k];
            result = h1 * result + h[k] * coeffs[j][i + n][k];
        }
    }
    return result;
}

void eval_bspline_new(int n, int m, int d, int total_sets, int samples_no)
{
    compute_new_coeffs(n, m);
    int j = 0;
    for(int s = 0; s < samples_no; s++)
    {
        if(samples[s] >= knots[j + 1 + m] && j < n - 1) j++;
        float t = (samples[s] - knots[j + m]) 
            / (knots[j + 1 + m] - knots[j + m]);
        compute_h(m, t);
        for(int i = j - m; i <= j; i++)
        {
            post_bezier[i + m] = eval_bezier_1d(m, t, j, i); 
        }
        for(int set_no = 0; set_no < total_sets; set_no++)
        {
            for(int dim_c = 0; dim_c < d; dim_c++)
            {
                result_points_new[set_no][s][dim_c] = 0.;
            }
            for(int i = j - m; i <= j; i++)
            {
                for(int dim_c = 0; dim_c < d; dim_c++)
                {   
                    result_points_new[set_no][s][dim_c] += 
                        post_bezier[i + m] * points[set_no][i + m][dim_c];
                }
            }
        }
    }
}















// *****************
// BLIND DE BOOR-COX
// *****************

void deBoorCox(int n, int m, int j, float u, int d, int set_no)
{
    for(int i = j - m; i <= j; i++)
    {
        for(int dim_c = 0; dim_c < d; dim_c++)
        {
            points_work[0][i + m][dim_c] = points[set_no][i + m][dim_c];
        }
    }
    for(int k = 1; k <= m; k++)
    {
        for(int i = j - m + k; i <= j; i++)
        {
            float a = (u - knots[i + m]) 
                / (knots[i + 1 + m - k + m] - knots[i + m]);
            for(int dim_c = 0; dim_c < d; dim_c++)
            {
                points_work[k][i + m][dim_c] = a * points_work[k - 1][i + m][dim_c] 
                    + (1.0 - a) * points_work[k - 1][i - 1 + m][dim_c];
            }
        }
    }
    // result stored in points_work[m][j + m][0..d-1]
}

void many_deBoorCox(int n, int m, int d, int total_sets, int samples_no)
{
    for(int set_no = 0; set_no < total_sets; set_no++)
    {
        int j = 0;
        for(int s = 0; s < samples_no; s++)
        {
            if(samples[s] >= knots[j + 1 + m] && j < n - 1) j++;
            deBoorCox(n, m, j, samples[s], d, set_no);
            for(int dim_c = 0; dim_c < d; dim_c++)
            {
                result_points[set_no][s][dim_c] = points_work[m][j + m][dim_c];
            }
        }
    }
    // result stored in result_points[no. of sets][curve sampling][point dim]
}





// *********************
// EVALUATE SPLINES ONCE
// *********************

void evaluate_splines(int n, int m, int j, float u)
{
    splines[0][j - 1 + m] = 0.0;
    splines[0][j + 1 + m] = 0.0;
    splines[0][j + m] = 1.0;
    for(int k = 1; k <= m; k++)
    {
        splines[k][j - k - 1 + m] = 0.0;
        splines[k][j + 1 + m] = 0.0;
        for(int i = j - k; i <= j; i++)
        {
            splines[k][i + m] = 0.0f;
            if(knots[k + i + m] != knots[i + m]) 
                splines[k][i + m] += (u - knots[i + m]) * splines[k - 1][i + m] 
                    / (knots[k + i + m] - knots[i + m]);
            if(knots[k + i + 1 + m] != knots[i + 1 + m])
                splines[k][i + m] += (knots[k + i + 1 + m] - u) * splines[k - 1][i + 1 + m] 
                    / (knots[k + i + 1 + m] - knots[i + 1 + m]);
        }
    }
}

void smart_deBoorCox(int n, int m, int d, int total_sets, int samples_no)
{
    int j = 0;
    for(int s = 0; s < samples_no; s++)
    {
        if(samples[s] >= knots[j + 1 + m] && j < n - 1) j++;
        evaluate_splines(n, m, j, samples[s]);
        for(int set_no = 0; set_no < total_sets; set_no++)
        {
            for(int dim_c = 0; dim_c < d; dim_c++)
            {
                result_points_spl[set_no][s][dim_c] = 0.0;
            }
            for(int i = j - m; i <= j; i++)
            {
                for(int dim_c = 0; dim_c < d; dim_c++)
                {
                    result_points_spl[set_no][s][dim_c] += splines[m][i + m] * points[set_no][i + m][dim_c];
                }
            }
        }
    }
    // result in result_points_spl[no. of sets][curve sampling][point dim]
}


// *************************
// JUST B-SPLINE COMPUTATION
// *************************

void eval_splines(int n, int m, int samples_no)
{
    int j = 0;
    for(int s = 0; s < samples_no; s++)
    {
        if(samples[s] >= knots[j + 1 + m] && j < n - 1) j++;
        evaluate_splines(n, m, j, samples[s]);
    }
}

void eval_splines_new(int n, int m, int samples_no)
{
    compute_new_coeffs(n, m);
    int j = 0;
    for(int s = 0; s < samples_no; s++)
    {
        if(samples[s] >= knots[j + 1 + m] && j < n - 1) j++;
        float t = (samples[s] - knots[j + m]) 
            / (knots[j + 1 + m] - knots[j + m]);
        compute_h(m, t);
        for(int i = j - m; i <= j; i++)
        {
            post_bezier[i + m] = eval_bezier_1d(m, t, j, i); 
        }
    }
}


void do_tests_just_splines()
{
    int total_sets = 1;
    time_t randtime;
    srand((unsigned) time(&randtime));
    clock_t start_t, end_t;
    printf("m,n,N,spline,new\n");
    for(int m = 3; m <= 15; m++)
    {
        for(int n = 10; n <= 50; n += 5)
        {
            int samples_no = generate_knots_and_samples(n, m);
    
            double elapsed_new = 0.0, elapsed_sdB = 0.0;
            for(int q = 0; q < 100; q++)
            {
                samples_no = generate_knots_and_samples(n, m);
                zero_coeffs(n, m);

                start_t = clock();
                eval_splines_new(n, m, samples_no);
                end_t = clock();
                elapsed_new += (double)(end_t - start_t) / CLOCKS_PER_SEC;

                start_t = clock();
                eval_splines(n, m, samples_no);
                end_t = clock();
                elapsed_sdB += (double)(end_t - start_t) / CLOCKS_PER_SEC;
            }
            printf("%d,%d,%d,%lf,%lf\n", m, n, 
                    samples_no, elapsed_sdB, elapsed_new);
        }
    }
}


// *************************

void toy_case()
{
    // TOY CASE TO CHECK CORRECTNESS
    int total_sets = 1;
    int n = 2;
    int m = 2;
    knots[0] = 0.0;
    knots[1] = 0.0;
    knots[2] = 0.0;
    knots[3] = 1.0;
    knots[4] = 3.0;
    knots[5] = 3.0;
    knots[6] = 3.0;
    samples[0] = 0.0;
    samples[1] = 0.5;
    samples[2] = 1.0;
    samples[3] = 2.0;
    samples[4] = 3.0;
    int samples_no = 5;
    int d = 2;
    
    points[0][0][0] = 0.0;
    points[0][0][1] = 0.0;
    points[0][1][0] = 1.0;
    points[0][1][1] = 0.0;
    points[0][2][0] = 1.0;
    points[0][2][1] = 1.0;
    points[0][3][0] = 0.0;
    points[0][3][1] = 1.0;

    eval_bspline_new(n, m, d, total_sets, samples_no);
    many_deBoorCox(n, m, d, total_sets, samples_no);
    smart_deBoorCox(n, m, d, total_sets, samples_no);

    printf("BEZIER COEFFS\n");
    for(int j = 0; j < n; j++)
    {
        for(int i = j - m; i <= j; i++)
        {
            printf("j = %d, i = %d:\n", j, i);
            printf("%1.3f %1.3f %1.3f\n", 
                   coeffs[j][i + m][0], 
                   coeffs[j][i + m][1], 
                   coeffs[j][i + m][2]);
        }
    }

    printf("LEFT = dBC; MID = sdB; RIGHT = new\n");
    for(int i = 0; i < samples_no; i++)
    {
        printf("s(%f) = [%1.3f, %1.3f] = [%1.3f, %1.3f] = [%1.3f, %1.3f]\n", 
                samples[i], result_points[0][i][0], result_points[0][i][1], 
                result_points_spl[0][i][0], result_points_spl[0][i][1], 
                result_points_new[0][i][0], result_points_new[0][i][1]);
    }
}
void do_tests()
{
    int total_sets_candidates[15];
    total_sets_candidates[0] = 1;
    total_sets_candidates[1] = 2;
    total_sets_candidates[2] = 3;
    total_sets_candidates[3] = 4;
    total_sets_candidates[4] = 5;
    total_sets_candidates[5] = 10;
    total_sets_candidates[6] = 15;
    total_sets_candidates[7] = 20;
    total_sets_candidates[8] = 25;
    total_sets_candidates[9] = 30;
    total_sets_candidates[10] = 50;
    total_sets_candidates[11] = 100;
    time_t randtime;
    srand((unsigned) time(&randtime));
    clock_t start_t, end_t;
    printf("d,m,n,N,M,deBoor,spline,new\n");
    for(int d = 1; d <= 3; d++)
    {
        for(int m = 3; m <= 15; m++)
        {
            for(int n = 10; n <= 50; n += 5)
            {
                for(int k = 0; k < 12; k++)
                {
                    int total_sets = total_sets_candidates[k];
                    int samples_no = generate_knots_and_samples(n, m);
        
                    double elapsed_new = 0.0, 
                           elapsed_dBC = 0.0, 
                           elapsed_sdB = 0.0;
                    for(int q = 0; q < 100; q++)
                    {
                        samples_no = generate_knots_and_samples(n, m);
                        generate_points(n, m, d, total_sets);
                        zero_coeffs(n, m);
                        start_t = clock();
                        eval_bspline_new(n, m, d, total_sets, samples_no);
                        end_t = clock();
                        elapsed_new += (double)(end_t - start_t) 
                            / CLOCKS_PER_SEC;

                        start_t = clock();
                        many_deBoorCox(n, m, d, total_sets, samples_no);
                        end_t = clock();
                        elapsed_dBC += (double)(end_t - start_t) 
                            / CLOCKS_PER_SEC;

                        start_t = clock();
                        smart_deBoorCox(n, m, d, total_sets, samples_no);
                        end_t = clock();
                        elapsed_sdB += (double)(end_t - start_t) 
                            / CLOCKS_PER_SEC;
                    }
                    printf("%d,%d,%d,%d,%d,%lf,%lf,%lf\n", d, m, n, samples_no, 
                            total_sets, elapsed_dBC, elapsed_sdB, elapsed_new);
                }
            }
        }
    }
}

void make_latex_table()
{
    // d = 2
    // m = 3, 5, 7, 9, 11
    // n = 20
    // M = 1, 5, 10, 20, 30, 50, 100
    int d = 2;
    int n = 20;

    int total_sets_candidates[7];
    total_sets_candidates[0] = 1;
    total_sets_candidates[1] = 5;
    total_sets_candidates[2] = 10;
    total_sets_candidates[3] = 20;
    total_sets_candidates[4] = 30;
    total_sets_candidates[5] = 50;
    total_sets_candidates[6] = 100;

    time_t randtime;
    srand((unsigned) time(&randtime));
    clock_t start_t, end_t;
    printf("\\begin{tabular}{lcccc}\n");
    printf("$M$ & $m$ & de~Boor-Cox & eval splines & new method \\\\ \\hline\n");
    for(int k = 0; k < 7; k++)
    {
        int total_sets = total_sets_candidates[k];
        for(int m = 3; m <= 11; m += 2)
        {
            double elapsed_new = 0.0, elapsed_dBC = 0.0, elapsed_sdB = 0.0;
            for(int q = 0; q < 100; q++)
            {
                int samples_no = generate_knots_and_samples(n, m);
                generate_points(n, m, d, total_sets);
                zero_coeffs(n, m);
                start_t = clock();
                eval_bspline_new(n, m, d, total_sets, samples_no);
                end_t = clock();
                elapsed_new += (double)(end_t - start_t) / CLOCKS_PER_SEC;

                start_t = clock();
                many_deBoorCox(n, m, d, total_sets, samples_no);
                end_t = clock();
                elapsed_dBC += (double)(end_t - start_t) / CLOCKS_PER_SEC;

                start_t = clock();
                smart_deBoorCox(n, m, d, total_sets, samples_no);
                end_t = clock();
                elapsed_sdB += (double)(end_t - start_t) / CLOCKS_PER_SEC;
            }
            printf("%d & %d & %1.3f & %1.3f & %1.3f \\\\", total_sets, m, 
                    elapsed_dBC, elapsed_sdB, elapsed_new);
            if(m == 11) printf("\\hline\n");
            else printf("\n");
        }
    }
    printf("\\end{tabular}");
}

float get_digits(float a, float b, float max)
{
    if( (a - b) / b < 1e-8) return max;
    float result = -log10((a - b) / b);
    if(result > max) return max;
}

void check_numerical_correctness()
{
    int total_sets_candidates[15];
    total_sets_candidates[0] = 1;
    total_sets_candidates[1] = 2;
    total_sets_candidates[2] = 3;
    total_sets_candidates[3] = 4;
    total_sets_candidates[4] = 5;
    total_sets_candidates[5] = 10;
    total_sets_candidates[6] = 15;
    total_sets_candidates[7] = 20;
    total_sets_candidates[8] = 25;
    total_sets_candidates[9] = 30;
    total_sets_candidates[10] = 50;
    total_sets_candidates[11] = 100;
    time_t randtime;
    srand((unsigned) time(&randtime));
    clock_t start_t, end_t;
    printf("new_mean, spl_mean,NMd\n");
    for(int d = 1; d <= 3; d++)
    {
        for(int m = 3; m <= 15; m++)
        {
            for(int n = 10; n <= 50; n += 5)
            {
                for(int k = 0; k < 12; k++)
                {
                    int total_sets = total_sets_candidates[k];
                    int samples_no = generate_knots_and_samples(n, m);
                    generate_points(n, m, d, total_sets);
                    zero_coeffs(n, m);

                    eval_bspline_new(n, m, d, total_sets, samples_no);
                    many_deBoorCox(n, m, d, total_sets, samples_no);
                    smart_deBoorCox(n, m, d, total_sets, samples_no);
                    float good_digits_new[total_sets][samples_no][d];
                    float good_digits_spl[total_sets][samples_no][d];
                    float new_min = 20.0, new_mean = 0.0, spl_min = 20.0, spl_mean = 0.0;
                    for(int k = 0; k < total_sets; k++)
                    {
                        for(int s = 0; s < samples_no; s++)
                        {
                            for(int dim_c = 0; dim_c < d; dim_c++)
                            {
                                float dBC_r = result_points[k][s][dim_c];
                                float new_r = result_points_new[k][s][dim_c];
                                float spl_r = result_points_spl[k][s][dim_c];
                                    
                                good_digits_new[k][s][dim_c] = get_digits(new_r, dBC_r, 8.0);
                                good_digits_spl[k][s][dim_c] = get_digits(spl_r, dBC_r, 8.0);

                                new_mean += good_digits_new[k][s][dim_c];
                                spl_mean += good_digits_spl[k][s][dim_c];

                                // printf("%1.3f, %1.3f\n", 
                                //         good_digits_new[k][s][dim_c],
                                //         good_digits_spl[k][s][dim_c]);


                                if (good_digits_new[k][s][dim_c] < new_min) 
                                    new_min = good_digits_new[k][s][dim_c];
                                if (good_digits_spl[k][s][dim_c] < spl_min) 
                                    spl_min = good_digits_spl[k][s][dim_c];
                             }
                        }
                    }
                    new_mean /= (total_sets * samples_no * d);
                    spl_mean /= (total_sets * samples_no * d);
                    printf("%1.3f, %1.3f, %d\n", new_mean, spl_mean, 
                            total_sets * samples_no * d);
                }
            }
        }
    }
}

int main()
{
    // toy_case();
    // check_numerical_correctness();
    // make_latex_table();
    do_tests();
    // do_tests_just_splines();
    return 0;
}
