/** \example datastore_statistics.cpp
 * Explains how to use the internal statistics library (see \ref jkqtptools_statistics ) together with JKQTPDatastore to generate advanced plots for 1-dimensional data.
 *
 * \ref JKQTPlotterBasicJKQTPDatastoreStatistics
 */
 
#include <QApplication>
#include "jkqtplotter/jkqtplotter.h"
#include "jkqtplotter/graphs/jkqtppeakstream.h"
#include "jkqtplotter/graphs/jkqtpboxplot.h"
#include "jkqtplotter/graphs/jkqtpstatisticsadaptors.h"
#include "jkqtplotter/graphs/jkqtpevaluatedfunction.h"
#include "jkqtcommon/jkqtpstatisticstools.h"
#include "jkqtcommon/jkqtpstringtools.h"
#include <random>
#include <cmath>


int main(int argc, char* argv[])
{
    QApplication app(argc, argv);


    // 1. create a window with several plotters and get a pointer to the internal datastores (for convenience)
    QWidget mainWidget;
    QGridLayout* lay;
    mainWidget.setLayout(lay=new QGridLayout);
    JKQTPlotter* plot1=new JKQTPlotter(&mainWidget);
    plot1->getPlotter()->setPlotLabel("Histograms and KDE");
    JKQTPDatastore* datastore1=plot1->getDatastore();
    lay->addWidget(plot1,1,0);
    JKQTPlotter* plot1cum=new JKQTPlotter(datastore1, &mainWidget);
    plot1cum->getPlotter()->setPlotLabel("Cummulative Histogram");
    lay->addWidget(plot1cum,1,1);
    JKQTPlotter* plot1kde=new JKQTPlotter(datastore1, &mainWidget);
    plot1kde->getPlotter()->setPlotLabel("Kernel Density Estimate");
    lay->addWidget(plot1kde,0,1);
    JKQTPlotter* plot1box=new JKQTPlotter(datastore1, &mainWidget);
    plot1box->getPlotter()->setPlotLabel("Boxplots");
    lay->addWidget(plot1box,0,0);


    // 2. Now we create two vectors with random values
    //    vector 1: The values are drawn from two different normal distributions d1 and d2,
    //    where for each datapoint the distribution is chosen randomly (by ddecide)
    //    vector 2: same values as in vector 1, if the value is drawn from d1
    //    vector 3: same values as in vector 1, if the value is drawn from d2
    size_t randomdatacol1=datastore1->addColumn("random data 1");
    size_t randomdatacol2=datastore1->addColumn("random data 2");
    size_t randomdatacol3=datastore1->addColumn("random data 3");
    std::random_device rd; // random number generators:
    std::mt19937 gen{rd()};
    std::uniform_int_distribution<> ddecide(0,1);
    std::normal_distribution<> d1{0,1};
    std::normal_distribution<> d2{6,1.2};
    for (size_t i=0; i<150; i++) {
        double v=0;
        const int decide=ddecide(gen);
        if (decide==0) v=d1(gen);
        else v=d2(gen);
        datastore1->appendToColumn(randomdatacol1, v);
        if (decide==0) datastore1->appendToColumn(randomdatacol2, v);
        else datastore1->appendToColumn(randomdatacol3, v);
    }
    QString d1_latex="$\\mathcal{N}("+jkqtp_floattolatexqstr(d1.mean(), 1)+","+jkqtp_floattolatexqstr(d1.stddev(), 1)+")$";
    QString d2_latex="$\\mathcal{N}("+jkqtp_floattolatexqstr(d2.mean(), 1)+","+jkqtp_floattolatexqstr(d2.stddev(), 1)+")$";

    // 3.1. To visualize the data, a simple JKQTPPeakStreamGraph is used:
    JKQTPPeakStreamGraph* gData1;
    plot1box->addGraph(gData1=new JKQTPPeakStreamGraph(plot1box));
    gData1->setDataColumn(randomdatacol1);
    gData1->setBaseline(-0.1);
    gData1->setPeakHeight(-0.05);
    gData1->setDrawBaseline(false);
    // 3.2. We calculate some basic statistics of that column and display it in the graph legend (via the graph title):
    //      Here we use functions of the statistics library for the first time. The statistics library uses an iterator interface
    //      scheme, much like the algorithms of the C++ standard library. Therefore we the iterator interface of JKQTPDatastore
    //      when calling the statistics functions.
    size_t N=0;
    double mean=jkqtpstatAverage(datastore1->begin(randomdatacol1), datastore1->end(randomdatacol1), &N);
    double std=jkqtpstatStdDev(datastore1->begin(randomdatacol1), datastore1->end(randomdatacol1));
    gData1->setTitle(QString("random data $"+d1_latex+"+"+d2_latex+"$: $\\overline{X_1}=%1, \\sigma_{X_1}=%2, N_{X_3}=%3$").arg(jkqtp_floattolatexqstr(mean, 2)).arg(jkqtp_floattolatexqstr(std, 2)).arg(N));

    // 3.3. same as 3.1-3.2, but for the second and thirdcolumn of data:
    JKQTPPeakStreamGraph* gData2;
    plot1box->addGraph(gData2=new JKQTPPeakStreamGraph(plot1box));
    gData2->setDataColumn(randomdatacol2);
    gData2->setBaseline(-0.1);
    gData2->setPeakHeight(0.05);
    gData2->setDrawBaseline(false);
    N=0;
    mean=jkqtpstatAverage(datastore1->begin(randomdatacol2), datastore1->end(randomdatacol2), &N);
    std=jkqtpstatStdDev(datastore1->begin(randomdatacol2), datastore1->end(randomdatacol2));
    gData2->setTitle(QString("random data subset $"+d1_latex+"$: $\\overline{X_2}=%1, \\sigma_{X_3}=%2, N_{X_3}=%3$").arg(jkqtp_floattolatexqstr(mean, 2)).arg(jkqtp_floattolatexqstr(std, 2)).arg(N));
    JKQTPPeakStreamGraph* gData3;
    plot1box->addGraph(gData3=new JKQTPPeakStreamGraph(plot1box));
    gData3->setDataColumn(randomdatacol3);
    gData3->setBaseline(-0.15);
    gData3->setPeakHeight(-0.05);
    gData3->setDrawBaseline(false);
    N=0;
    mean=jkqtpstatAverage(datastore1->begin(randomdatacol3), datastore1->end(randomdatacol3), &N);
    std=jkqtpstatStdDev(datastore1->begin(randomdatacol3), datastore1->end(randomdatacol3));
    gData3->setTitle(QString("random data subset $"+d2_latex+"$: $\\overline{X_3}=%1, \\sigma_{X_3}=%2, N_{X_3}=%3$").arg(jkqtp_floattolatexqstr(mean, 2)).arg(jkqtp_floattolatexqstr(std, 2)).arg(N));


    // 3.4. Now we calculate a 5-Value Summary of the two datasets and use it to plot corresponding boxplots
    //      This can be done by hand, or you can call jkqtpstatAddHBoxplot() which saves some typing. This function
    //      uses jkqtpstat5NumberStatistics() internally to calculate the statistics.
    JKQTPBoxplotHorizontalElement* gBox2=jkqtpstatAddHBoxplot(plot1box->getPlotter(), datastore1->begin(randomdatacol2), datastore1->end(randomdatacol2), -0.25);
    gBox2->setColor(gData2->getKeyLabelColor());
    gBox2->setBoxWidthAbsolute(16);
    JKQTPBoxplotHorizontalElement* gBox3=jkqtpstatAddHBoxplot(plot1box->getPlotter(), datastore1->begin(randomdatacol3), datastore1->end(randomdatacol3), -0.35);
    gBox3->setColor(gData3->getKeyLabelColor());
    gBox3->setBoxWidthAbsolute(16);
    // 3.5. In addition to jkqtpstatAddHBoxplot() there is also jkqtpstatAddHBoxplotAndOutliers(), which generates two graphs:
    //      one JKQTPBoxplotHorizontalElement for the boxplot and one JKQTPSingleColumnSymbolsGraph for the outliers
    //      Note that this function generates additional data columns in the datastore of the provided plotter to represent
    //      the outlier locations.
    //      jkqtpstatAddHBoxplotAndOutliers() calculates the 3% and 97% Quantiles for the boxplots whiskers' ends. You can change that
    //      by supplying other quantiles to the call
    std::pair<JKQTPBoxplotHorizontalElement*,JKQTPSingleColumnSymbolsGraph*> gBox1=jkqtpstatAddHBoxplotAndOutliers(plot1box->getPlotter(), datastore1->begin(randomdatacol1), datastore1->end(randomdatacol1), -0.3);
    //      jkqtpstatAddHBoxplotAndOutliers() calculates the 3% and 97% Quantiles for the boxplots whiskers' ends. You can change that
    //      by supplying other quantiles to the call
    //std::pair<JKQTPBoxplotHorizontalElement*,JKQTPXYLineGraph*> gBox1=jkqtpstatAddHBoxplotAndOutliers(plot1box->getPlotter(), datastore1->begin(randomdatacol1), datastore1->end(randomdatacol1), -0.3,
    //                                                                                                  0.25, 0.75, // 1. and 3. Quartile for the boxplot box
    //                                                                                                  0.05, 0.95 // Quantiles for the boxplot box whiskers' ends
    //                                                                                                  );
    gBox1.first->setColor(gData1->getKeyLabelColor());
    gBox1.second->setColor(gData1->getKeyLabelColor());
    gBox1.second->setSymbolType(JKQTPGraphSymbols::JKQTPCircle);
    gBox1.second->setSymbolSize(7);
    gBox1.first->setBoxWidthAbsolute(16);
    //      the simple alternative would have been:
    //JKQTPBoxplotHorizontalElement* gBox1;
    //plot1box->addGraph(gBox1=jkqtpstatAddHBoxplot(plot1box->getPlotter(), datastore1->begin(randomdatacol1), datastore1->end(randomdatacol1)));
    //gBox1->setPos(-0.3);
    //gBox1->setColor(gData1->getKeyLabelColor());
    //gBox1->setBoxWidthAbsolute(16);




    // 4.1. We repeat the JKQTPPeakStreamGraph visualization from above:
    plot1->addGraph(gData1=new JKQTPPeakStreamGraph(plot1));
    gData1->setDataColumn(randomdatacol1);
    gData1->setBaseline(-0.1);
    gData1->setPeakHeight(-0.05);
    gData1->setDrawBaseline(false);
    gData1->setTitle("random data $"+d1_latex+"+"+d2_latex+"$");

    // 4.2. same as 3.1-3.2, but for the second and thirdcolumn of data:
    plot1->addGraph(gData2=new JKQTPPeakStreamGraph(plot1));
    gData2->setDataColumn(randomdatacol2);
    gData2->setBaseline(-0.1);
    gData2->setPeakHeight(0.05);
    gData2->setDrawBaseline(false);
    gData2->setTitle("random data subset $"+d1_latex+"$");
    plot1->addGraph(gData3=new JKQTPPeakStreamGraph(plot1));
    gData3->setDataColumn(randomdatacol3);
    gData3->setBaseline(-0.15);
    gData3->setPeakHeight(-0.05);
    gData3->setDrawBaseline(false);
    gData3->setTitle("random data subset $"+d2_latex+"$");

    // 4.3. for comparison we add plots of the initial distributions:
    plot1->addGraph(new JKQTPXFunctionLineGraph(std::bind(&jkqtp_gaussdist, std::placeholders::_1, d1.mean(), d1.stddev()), d1_latex, plot1));
    plot1->addGraph(new JKQTPXFunctionLineGraph(std::bind(&jkqtp_gaussdist, std::placeholders::_1, d2.mean(), d2.stddev()), d2_latex, plot1));



    // 4.1. next we calculate a histogram of the data and add a plot to the graph:
    JKQTPBarVerticalGraph* hist1=jkqtpstatAddHHistogram1DAutoranged(plot1->getPlotter(),  datastore1->begin(randomdatacol1), datastore1->end(randomdatacol1), 15);
    //      here the bins are defined by the full range of the data and the bin count (15) is specified
    //      alternatively you could specify the bin width and the number would be calculated automatically:
    //JKQTPBarVerticalGraph* hist1=jkqtpstatAddHHistogram1DAutoranged(plot1->getPlotter(),  datastore1->begin(randomdatacol1), datastore1->end(randomdatacol1), 0.5);
    //      a third option is to define the bins via a vector of values (lower borders):
    //std::vector<double> bins{-2,-1.5,-1,-0.75,-0.5,-0.25,0,0.25,0.5,0.75,1,1.5,2,2.5,3,4,5,6,7,8,9,10};
    //JKQTPBarVerticalGraph* hist1=jkqtpstatAddHHistogram1D(plot1->getPlotter(),  datastore1->begin(randomdatacol1), datastore1->end(randomdatacol1), bins.begin(), bins.end());
    hist1->setColor(QColorWithAlphaF(gData1->getKeyLabelColor(), 0.5)); // use same color as gData1, but with alpha set to 0.5 (50% transparency)




    // 5.1. instead of histograms, it can also make sense to calculate Kernel Density Estimates, especially when only few datapoints are available.
    //      To demonstrate this, we first calculate take a subset of the values in randomdatacol1 as a small test dataset.
    size_t randomdatacol1subset=datastore1->copyColumn(randomdatacol1, 1, 7, "subset of "+datastore1->getColumnName(randomdatacol1));
    JKQTPPeakStreamGraph* gData2kde;
    plot1kde->addGraph(gData2kde=new JKQTPPeakStreamGraph(plot1kde));
    gData2kde->setDataColumn(randomdatacol1subset);
    gData2kde->setBaseline(-0.05);
    gData2kde->setPeakHeight(-0.1);
    gData2kde->setDrawBaseline(false);
    gData2kde->setTitle("data");
    //      first we plot the histogram of this dataset, with 0.5 bin width:
    JKQTPBarVerticalGraph* hist1kde=jkqtpstatAddHHistogram1DAutoranged(plot1kde->getPlotter(),  datastore1->begin(randomdatacol1subset), datastore1->end(randomdatacol1subset), 0.5);
    hist1kde->setTitle("histogram");
    hist1kde->setColor(QColorWithAlphaF(gData2kde->getKeyLabelColor(), 0.25)); // use same color as gData1, but with alpha set to 0.5 (50% transparency)
    // 5.2. now we first extimate the bandwidth:
    double kdeBandwidth=jkqtpstatEstimateKDEBandwidth(datastore1->begin(randomdatacol1subset), datastore1->end(randomdatacol1subset));
    //      and generate a vector of positions, where we want to evaluate the KDE:
    std::vector<double> xKDE;
    for (double x=-5; x<=10; x+=0.01) xKDE.push_back(x);
    //      now the KDE can be added (gaussian kernel, evaluated at the positions in xKDE):
    JKQTPXYLineGraph* kde1=jkqtpstatAddHKDE1D(plot1kde->getPlotter(),  datastore1->begin(randomdatacol1subset), datastore1->end(randomdatacol1subset),
    //                                                  evaluate at locations in xKDE
                                                        xKDE.begin(), xKDE.end(),
    //                                                  use a gaussian kernel
                                                        &jkqtpstatKernel1DGaussian,
    //                                                  estimate the bandwidth
                                                        kdeBandwidth);
    kde1->setTitle("KDE, gaussian, $\\mbox{BW}="+jkqtp_floattolatexqstr(kdeBandwidth, 3)+"$");
    JKQTPXYLineGraph* kde11=jkqtpstatAddHKDE1D(plot1kde->getPlotter(),  datastore1->begin(randomdatacol1subset), datastore1->end(randomdatacol1subset),
    //                                                  evaluate at locations in xKDE
                                                        xKDE.begin(), xKDE.end(),
    //                                                  use a gaussian kernel
                                                        &jkqtpstatKernel1DGaussian,
    //                                                  a very small bandwidth
                                                        0.1);
    kde11->setTitle("KDE, gaussian, $\\mbox{BW}="+jkqtp_floattolatexqstr(0.1, 3)+"$");
    //      here a second KDE with a different kernel (Epanechnikov) and the range of evaluation positions defined via three numbers:
    JKQTPXYLineGraph* kde2=jkqtpstatAddHKDE1D(plot1kde->getPlotter(),  datastore1->begin(randomdatacol1subset), datastore1->end(randomdatacol1subset),
    //                                                  evaluate at locations between -5 and 10, in steps of 0.01 (equivalent to the line above, but without pre-calculating a vector)
                                                        -5.0,0.01,10.0,
    //                                                  use a gaussian kernel
                                                        &jkqtpstatKernel1DEpanechnikov,
    //                                                  estimate the bandwidth
                                                        kdeBandwidth);
    kde2->setTitle("KDE, epanechnikov, $\\mbox{BW}="+jkqtp_floattolatexqstr(kdeBandwidth, 3)+"$");
    kde1->setColor(QColorWithAlphaF(gData2kde->getKeyLabelColor(), 0.5)); // use same color as gData1, but with alpha set to 0.5 (50% transparency)
    // 5.3. for comparison we add plots of the initial distributions:
    plot1kde->addGraph(new JKQTPXFunctionLineGraph(std::bind(&jkqtp_gaussdist, std::placeholders::_1, d1.mean(), d1.stddev()), d1_latex, plot1));
    plot1kde->addGraph(new JKQTPXFunctionLineGraph(std::bind(&jkqtp_gaussdist, std::placeholders::_1, d2.mean(), d2.stddev()), d2_latex, plot1));



    // 6.1. now we calculate a cummulative histogram:
    JKQTPPeakStreamGraph* gData2com;
    plot1cum->addGraph(gData2com=new JKQTPPeakStreamGraph(plot1cum));
    gData2com->setDataColumn(randomdatacol2);
    gData2com->setBaseline(-1);
    gData2com->setPeakHeight(-10);
    gData2com->setDrawBaseline(false);
    JKQTPBarVerticalGraph* histcum2=jkqtpstatAddHHistogram1DAutoranged(plot1cum->getPlotter(),  datastore1->begin(randomdatacol2), datastore1->end(randomdatacol2),
    //                                                                          bin width
                                                                                0.1,
    //                                                                          normalized, cummulative
                                                                                false,       true);
    histcum2->setColor(QColorWithAlphaF(gData2com->getKeyLabelColor(), 0.2)); // use same color as gData1, but with alpha set to 0.5 (50% transparency)
    // 6.2. also a kernel density estimate can be accumulated:
    JKQTPXYLineGraph* kdecum2=jkqtpstatAddHKDE1D(plot1cum->getPlotter(),  datastore1->begin(randomdatacol2), datastore1->end(randomdatacol2),
    //                                                  evaluate at locations between -3.5 and 3.5, in steps of 0.01
                                                        -3.5,0.01,3.5,
    //                                                  use a uniform/box kernel
                                                        &jkqtpstatKernel1DUniform,
    //                                                  estimate the bandwidth
                                                        jkqtpstatEstimateKDEBandwidth(datastore1->begin(randomdatacol2), datastore1->end(randomdatacol2)),
    //                                                  cummulative KDE:
                                                        true);
    kdecum2->setColor(gData2com->getKeyLabelColor()); // use same color as gData1, but with alpha set to 0.5 (50% transparency)


    // autoscale the plot so the graph is contained
    plot1->zoomToFit();
    plot1->setGrid(false);
    plot1->getXAxis()->setShowZeroAxis(false);
    plot1->getPlotter()->setKeyBackgroundColor(QColorWithAlphaF("white", 0.25), Qt::SolidPattern);
    plot1->setY(-0.25, 0.45);
    plot1cum->zoomToFit();
    plot1cum->setGrid(false);
    plot1cum->getXAxis()->setShowZeroAxis(false);
    plot1cum->getPlotter()->setKeyBackgroundColor(QColorWithAlphaF("white", 0.25), Qt::SolidPattern);
    plot1kde->zoomToFit();
    plot1kde->setGrid(false);
    plot1kde->getXAxis()->setShowZeroAxis(false);
    plot1kde->getPlotter()->setKeyBackgroundColor(QColorWithAlphaF("white", 0.25), Qt::SolidPattern);
    plot1kde->setY(-0.155, 0.45);
    plot1box->zoomToFit();
    plot1box->setGrid(false);
    plot1box->getXAxis()->setShowZeroAxis(false);
    plot1box->getPlotter()->setKeyBackgroundColor(QColorWithAlphaF("white", 0.25), Qt::SolidPattern);
    plot1box->setY(-0.4, 0.0);

    // show plotter and make it a decent size
    mainWidget.show();
    mainWidget.resize(1200,800);

    return app.exec();
}