added weighted sum of deviations (chi-square) and coefficient of determination (R^2) to statistics library

added log regression model to statistics library
added output of R^2 and chi^2 to regression adaptors (output in graph label)
bugfixed some documentation typos
This commit is contained in:
jkriege2 2019-06-02 14:08:04 +02:00
parent 09237a3d55
commit ad38ac47f2
8 changed files with 203 additions and 59 deletions

View File

@ -72,21 +72,21 @@ All statistics functions use all values in the given range and convert each valu
\defgroup jkqtptools_math_statistics_1dhist 1-dimensional Histograms
\ingroup jkqtptools_math_statistics
\defgroup jkqtptools_math_statistics_1dhist_kernels Kernels for 1-dimensional Histograms
\ingroup jkqtptools_math_statistics_1dhist
\defgroup jkqtptools_math_statistics_2dhist 2-dimensional Histograms
\ingroup jkqtptools_math_statistics
\defgroup jkqtptools_math_statistics_2dhist_kernels Kernels for 2-dimensional Histograms
\ingroup jkqtptools_math_statistics_2dhist
\defgroup jkqtptools_math_statistics_1dkde 1-dimensional Kernel Density Estimates
\ingroup jkqtptools_math_statistics
\defgroup jkqtptools_math_statistics_1dkde_kernels Kernels for 1-dimensional Histograms
\ingroup jkqtptools_math_statistics_1dkde
\defgroup jkqtptools_math_statistics_2dkde 2-dimensional Kernel Density Estimates
\ingroup jkqtptools_math_statistics
\defgroup jkqtptools_math_statistics_2dkde_kernels Kernels for 2-dimensional Histograms
\ingroup jkqtptools_math_statistics_2dkde
\defgroup jkqtptools_math_statistics_adaptors Statistics To Plot Adaptors
\ingroup jkqtptools_math_statistics

View File

@ -189,7 +189,10 @@ Again these two steps can be simplified using an "adaptor":
datastore1->begin(colWLinE), datastore1->end(colWLinE),
&coeffA, &coeffB, false, false,
&jkqtp_inversePropSaveDefault<double>);
```... or even shorter:
```
... or even shorter:
```.cpp
jkqtpstatAddLinearWeightedRegression(graphD);
```
@ -208,8 +211,9 @@ which performs a simple non-weighted regression. The difference between the two
# Linearizable Regression Models
In addition to the simple linear regression model `f(x)=a+b*x`, it is also possible to fit a few non-linear models by transforming the data:
- power-law function`f(x)=a*x^b`, which is a linear function in a log-log-plot
- exponential function `f(x)=a*exp(b*x)`, which is a linear function in a semi-log-plot
- power-law function`f(x)=a*x^b`, which is a linear function in a log(x)-log(y)-plot
- exponential function `f(x)=a*exp(b*x)`, which is a linear function in a x-log(y)-plot
- logarithm function `f(x)=a+b*ln(x)`, which is a linear function in a exp(x)-y-plot
The available models are defined in the enum `JKQTPStatRegressionModelType`. And there exists a function `jkqtpStatGenerateRegressionModel()`, which returns a C++-functor representing the function.
To demonstrate these fitting options, we first generate data from an exponential and a power-law model. Note that we also add normally distributed errors, but in order to ensure that we do not obtain y-values <0, we use loops that draw normally distributed random numbers, until this condition is met:
@ -234,11 +238,11 @@ To demonstrate these fitting options, we first generate data from an exponential
ypow=model_powerlaw(x, a0_powerlaw, b0_powerlaw)+d1(gen);
}
datastore1->appendToColumn(colNLLinYPow, ypow);
double yexp=model_exp(x, a0_powerlaw, b0_powerlaw)+d1(gen);
double yexp=model_exp(x, a0_exp, b0_exp)+d1(gen);
while (yexp<0) {
yexp=model_exp(x, a0_powerlaw, b0_powerlaw)+d1(gen);
yexp=model_exp(x, a0_exp, b0_exp)+d1(gen);
}
datastore1->appendToColumn(colNLLinYExp, model_exp(x, a0_exp, b0_exp)+d1(gen));
datastore1->appendToColumn(colNLLinYExp, yexp);
}
```
@ -284,6 +288,7 @@ Of course also "adaptors" exist that allow to perform the steps above in a singl
jkqtpstatAddRegression(plot5->getPlotter(), JKQTPStatRegressionModelType::PowerLaw, datastore1->begin(colNLLinX), datastore1->end(colNLLinX), datastore1->begin(colNLLinYPow), datastore1->end(colNLLinYPow));
```
... or even shorter:
```.cpp
jkqtpstatAddRegression(graphD_exp, JKQTPStatRegressionModelType::Exponential);
jkqtpstatAddRegression(graphD_powerlaw, JKQTPStatRegressionModelType::PowerLaw);

View File

@ -206,11 +206,15 @@ int main(int argc, char* argv[])
double b0_powerlaw=0.25;
double a0_exp=5;
double b0_exp=0.5;
double a0_log=0;
double b0_log=1;
size_t colNLLinX=datastore1->addColumn("non-lin data, x");
size_t colNLLinYExp=datastore1->addColumn("non-lin data, y, exponential model");
size_t colNLLinYPow=datastore1->addColumn("non-lin data, y, power-law model");
size_t colNLLinYLog=datastore1->addColumn("non-lin data, y, log-law model");
auto model_powerlaw=jkqtpStatGenerateRegressionModel(JKQTPStatRegressionModelType::PowerLaw);
auto model_exp=jkqtpStatGenerateRegressionModel(JKQTPStatRegressionModelType::Exponential);
auto model_log=jkqtpStatGenerateRegressionModel(JKQTPStatRegressionModelType::Logarithm);
for (double x=0.1; x<=10; x+=0.5) {
datastore1->appendToColumn(colNLLinX, x);
double ypow=model_powerlaw(x, a0_powerlaw, b0_powerlaw)+d1(gen);
@ -218,11 +222,12 @@ int main(int argc, char* argv[])
ypow=model_powerlaw(x, a0_powerlaw, b0_powerlaw)+d1(gen);
}
datastore1->appendToColumn(colNLLinYPow, ypow);
double yexp=model_exp(x, a0_powerlaw, b0_powerlaw)+d1(gen);
double yexp=model_exp(x, a0_exp, b0_exp)+d1(gen);
while (yexp<0) {
yexp=model_exp(x, a0_powerlaw, b0_powerlaw)+d1(gen);
yexp=model_exp(x, a0_exp, b0_exp)+d1(gen);
}
datastore1->appendToColumn(colNLLinYExp, model_exp(x, a0_exp, b0_exp)+d1(gen));
datastore1->appendToColumn(colNLLinYExp, yexp);
datastore1->appendToColumn(colNLLinYLog, model_log(x, a0_log, b0_log));
}
// we visualize this data with a simple scatter graphs:
JKQTPXYLineGraph* graphD_powerlaw;
@ -235,6 +240,11 @@ int main(int argc, char* argv[])
graphD_exp->setXYColumns(colNLLinX, colNLLinYExp);
graphD_exp->setDrawLine(false);
graphD_exp->setTitle(QString("data $%1+\\mathcal{N}(0,1)$").arg(jkqtpstatRegressionModel2Latex(JKQTPStatRegressionModelType::Exponential, a0_exp, b0_exp)));
JKQTPXYLineGraph* graphD_log;
plot5->addGraph(graphD_log=new JKQTPXYLineGraph(plot5));
graphD_log->setXYColumns(colNLLinX, colNLLinYLog);
graphD_log->setDrawLine(false);
graphD_log->setTitle(QString("data $%1+\\mathcal{N}(0,1)$").arg(jkqtpstatRegressionModel2Latex(JKQTPStatRegressionModelType::Logarithm, a0_log, b0_log)));
// 5.2. Now we calculate the regression models and add a plot to the graph:
double cA=0, cB=0;
JKQTPXFunctionLineGraph* gFunc;
@ -253,6 +263,7 @@ int main(int argc, char* argv[])
//jkqtpstatAddRegression(plot5->getPlotter(), JKQTPStatRegressionModelType::PowerLaw, datastore1->begin(colNLLinX), datastore1->end(colNLLinX), datastore1->begin(colNLLinYPow), datastore1->end(colNLLinYPow));
//jkqtpstatAddRegression(graphD_exp, JKQTPStatRegressionModelType::Exponential);
//jkqtpstatAddRegression(graphD_powerlaw, JKQTPStatRegressionModelType::PowerLaw);
jkqtpstatAddRegression(graphD_log, JKQTPStatRegressionModelType::Logarithm);

View File

@ -110,15 +110,17 @@ std::function<double (double, double, double)> jkqtpStatGenerateRegressionModel(
case JKQTPStatRegressionModelType::Linear: return [](double x, double a, double b)->double { return a+b*x; };
case JKQTPStatRegressionModelType::PowerLaw: return [](double x, double a, double b)->double { return a*pow(x,b); };
case JKQTPStatRegressionModelType::Exponential: return [](double x, double a, double b)->double { return a*exp(b*x); };
case JKQTPStatRegressionModelType::Logarithm: return [](double x, double a, double b)->double { return a+b*log(x); };
}
throw std::runtime_error("unknown JKQTPStatRegressionModelType in jkqtpStatGenerateRegressionModel()");
}
QString jkqtpstatRegressionModel2Latex(JKQTPStatRegressionModelType type, double a, double b) {
switch(type) {
case JKQTPStatRegressionModelType::Linear: return QString("f(x)=%1+%2{\\cdot}x").arg(jkqtp_floattolatexqstr(a, 3)).arg(jkqtp_floattolatexqstr(b, 3));
case JKQTPStatRegressionModelType::Linear: return QString("f(x)=%1%2{\\cdot}x").arg(jkqtp_floattolatexqstr(a, 2, true, 1e-16,1e-2, 1e4,false)).arg(jkqtp_floattolatexqstr(b, 2, true, 1e-16,1e-2, 1e4,true));
case JKQTPStatRegressionModelType::PowerLaw: return QString("f(x)=%1{\\cdot}x^{%2}").arg(jkqtp_floattolatexqstr(a, 3)).arg(jkqtp_floattolatexqstr(b, 3));
case JKQTPStatRegressionModelType::Exponential: return QString("f(x)=%1{\\cdot}\\exp(%2{\\cdot}x)").arg(jkqtp_floattolatexqstr(a, 3)).arg(jkqtp_floattolatexqstr(b, 3));
case JKQTPStatRegressionModelType::Logarithm: return QString("f(x)=%1%2{\\cdot}\\ln(x)").arg(jkqtp_floattolatexqstr(a, 2, true, 1e-16,1e-2, 1e4,false)).arg(jkqtp_floattolatexqstr(b, 2, true, 1e-16,1e-2, 1e4,true));
}
throw std::runtime_error("unknown JKQTPStatRegressionModelType in jkqtpstatRegressionModel2Latex()");
}
@ -130,11 +132,13 @@ std::function<double (double)> jkqtpStatGenerateRegressionModel(JKQTPStatRegress
std::pair<std::function<double (double)>, std::function<double (double)> > jkqtpStatGenerateTransformation(JKQTPStatRegressionModelType type) {
auto logF=[](double x)->double { return log(x); };
auto expF=[](double x)->double { return exp(x); };
auto idF=&jkqtp_identity<double>;
switch(type) {
case JKQTPStatRegressionModelType::Linear: return std::pair<std::function<double(double)>,std::function<double(double)> >(idF, idF);
case JKQTPStatRegressionModelType::PowerLaw: return std::pair<std::function<double(double)>,std::function<double(double)> >(logF, logF);
case JKQTPStatRegressionModelType::Exponential: return std::pair<std::function<double(double)>,std::function<double(double)> >(idF, logF);
case JKQTPStatRegressionModelType::Logarithm: return std::pair<std::function<double(double)>,std::function<double(double)> >(logF, idF);
}
throw std::runtime_error("unknown JKQTPStatRegressionModelType in jkqtpStatGenerateTransformation()");
}
@ -147,8 +151,9 @@ std::pair<std::function<double (double)>, std::function<double (double)> > jkqtp
case JKQTPStatRegressionModelType::Linear: return std::pair<std::function<double(double)>,std::function<double(double)> >(idF, idF);
case JKQTPStatRegressionModelType::PowerLaw: return std::pair<std::function<double(double)>,std::function<double(double)> >(logF, expF);
case JKQTPStatRegressionModelType::Exponential: return std::pair<std::function<double(double)>,std::function<double(double)> >(logF, expF);
case JKQTPStatRegressionModelType::Logarithm: return std::pair<std::function<double(double)>,std::function<double(double)> >(idF, idF);
}
throw std::runtime_error("unknown JKQTPStatRegressionModelType in jkqtpStatGenerateTransformation()");
throw std::runtime_error("unknown JKQTPStatRegressionModelType in jkqtpStatGenerateParameterATransformation()");
}
std::pair<std::function<double (double)>, std::function<double (double)> > jkqtpStatGenerateParameterBTransformation(JKQTPStatRegressionModelType type) {
@ -159,6 +164,7 @@ std::pair<std::function<double (double)>, std::function<double (double)> > jkqtp
case JKQTPStatRegressionModelType::Linear: return std::pair<std::function<double(double)>,std::function<double(double)> >(idF, idF);
case JKQTPStatRegressionModelType::PowerLaw: return std::pair<std::function<double(double)>,std::function<double(double)> >(idF, idF);
case JKQTPStatRegressionModelType::Exponential: return std::pair<std::function<double(double)>,std::function<double(double)> >(idF, idF);
case JKQTPStatRegressionModelType::Logarithm: return std::pair<std::function<double(double)>,std::function<double(double)> >(idF, idF);
}
throw std::runtime_error("unknown JKQTPStatRegressionModelType in jkqtpStatGenerateTransformation()");
throw std::runtime_error("unknown JKQTPStatRegressionModelType in jkqtpStatGenerateParameterBTransformation()");
}

View File

@ -1800,7 +1800,7 @@ inline void jkqtpstatKDE1D(InputIt first, InputIt last, double binXLeft, double
/*! \brief calculate the linear regression coefficients for a given data range \a firstX / \a firstY ... \a lastX / \a lastY where the model is \f$ f(x)=a+b\cdot x \f$
So this function solves the least-squares optimization problem: \f[ (a^\ast, b^\ast)=\mathop{arg\;min}\limits_{a,b}\sum\limits_i\left(y_i-(a+b\cdot x_i)\right)^2 \f]
So this function solves the least-squares optimization problem: \f[ (a^\ast, b^\ast)=\mathop{\mathrm{arg\;min}}\limits_{a,b}\sum\limits_i\left(y_i-(a+b\cdot x_i)\right)^2 \f]
\ingroup jkqtptools_math_statistics_regression
\tparam InputItX standard iterator type of \a firstX and \a lastX.
@ -1857,7 +1857,7 @@ inline void jkqtpstatLinearRegression(InputItX firstX, InputItX lastX, InputItY
/*! \brief calculate the weighted linear regression coefficients for a given for a given data range \a firstX / \a firstY / \a firstW ... \a lastX / \a lastY / \a lastW where the model is \f$ f(x)=a+b\cdot x \f$
So this function solves the least-squares optimization problem: \f[ (a^\ast, b^\ast)=\mathop{arg\;min}\limits_{a,b}\sum\limits_iw_i^2\cdot\left(y_i-(a+b\cdot x_i)\right)^2 \f]
So this function solves the least-squares optimization problem: \f[ (a^\ast, b^\ast)=\mathop{\mathrm{arg\;min}}\limits_{a,b}\sum\limits_iw_i^2\cdot\left(y_i-(a+b\cdot x_i)\right)^2 \f]
\ingroup jkqtptools_math_statistics_regression
\tparam InputItX standard iterator type of \a firstX and \a lastX.
@ -1937,7 +1937,7 @@ inline void jkqtpstatLinearWeightedRegression(InputItX firstX, InputItX lastX, I
/*! \brief calculate the (robust) iteratively reweighted least-squares (IRLS) estimate for the parameters of the model \f$ f(x)=a+b\cdot x \f$
for a given data range \a firstX / \a firstY ... \a lastX / \a lastY
So this function finds an outlier-robust solution to the optimization problem:
\f[ (a^\ast,b^\ast)=\mathop{arg\;min}\limits_{a,b}\sum\limits_i|a+b\cdot x_i-y_i|^p \f]
\f[ (a^\ast,b^\ast)=\mathop{\mathrm{arg\;min}}\limits_{a,b}\sum\limits_i|a+b\cdot x_i-y_i|^p \f]
\ingroup jkqtptools_math_statistics_regression
\ingroup jkqtptools_math_statistics_regression
@ -1957,16 +1957,16 @@ inline void jkqtpstatLinearWeightedRegression(InputItX firstX, InputItX lastX, I
This is a simple form of the IRLS algorithm to estimate the parameters a and b in a linear model \f$ f(x)=a+b\cdot x \f$.
This algorithm solves the optimization problem for a \f$ L_p\f$-norm:
\f[ (a^\ast,b^\ast)=\mathop{arg\;min}\limits_{a,b}\sum\limits_i|a+b\cdot x_i-y_i|^p \f]
\f[ (a^\ast,b^\ast)=\mathop{\mathrm{arg\;min}}\limits_{a,b}\sum\limits_i|a+b\cdot x_i-y_i|^p \f]
by iteratively optimization weights \f$ \vec{w} \f$ and solving a weighted least squares problem in each iteration:
\f[ (a_n,b_n)=\mathop{arg\;min}\limits_{a,b}\sum\limits_i|a+b\cdot x_i-y_i|^{(p-2)}\cdot|a+b\cdot x_i-y_i|^2 \f]
\f[ (a_n,b_n)=\mathop{\mathrm{arg\;min}}\limits_{a,b}\sum\limits_i|a+b\cdot x_i-y_i|^{(p-2)}\cdot|a+b\cdot x_i-y_i|^2 \f]
The IRLS-algorithm works as follows:
- calculate initial \f$ a_0\f$ and \f$ b_0\f$ with unweighted regression from x and y
- perform a number of iterations (parameter \a iterations ). In each iteration \f$ n\f$:
- calculate the error vector \f$\vec{e}\f$: \f[ e_i = a+b\cdot x_i -y_i \f]
- estimate new weights \f$\vec{w}\f$: \[ w_i=|e_i|^{(p-2)/2} \f]
- estimate new weights \f$\vec{w}\f$: \f[ w_i=|e_i|^{(p-2)/2} \f]
- calculate new estimates \f$ a_n\f$ and \f$ b_n\f$ with weighted regression from \f$ \vec{x}\f$ and \f$ \vec{y}\f$ and \f$ \vec{w}\f$
.
- return the last estimates \f$ a_n\f$ and \f$ b_n\f$
@ -2024,6 +2024,7 @@ enum class JKQTPStatRegressionModelType {
Linear, /*!< \brief linear model \f$ f(x)=a+b\cdot x \f$ */
PowerLaw, /*!< \brief power law model \f$ f(x)=a\cdot x^b \f$ */
Exponential, /*!< \brief exponential model \f$ f(x)=a\cdot \exp(b\cdot x) \f$ */
Logarithm, /*!< \brief exponential model \f$ f(x)=a+b\cdot \ln(x) \f$ */
};
@ -2064,7 +2065,7 @@ JKQTP_LIB_EXPORT std::pair<std::function<double(double)>,std::function<double(do
/*! \brief calculate the linear regression coefficients for a given data range \a firstX / \a firstY ... \a lastX / \a lastY where the model is defined by \a type
So this function solves the least-squares optimization problem: \f[ (a^\ast, b^\ast)=\mathop{arg\;min}\limits_{a,b}\sum\limits_i\left(y_i-f_{\text{type}}(x_i,a,b)\right)^2 \f]
So this function solves the least-squares optimization problem: \f[ (a^\ast, b^\ast)=\mathop{\mathrm{arg\;min}}\limits_{a,b}\sum\limits_i\left(y_i-f_{\text{type}}(x_i,a,b)\right)^2 \f]
by reducing it to a linear fit by transforming x- and/or y-data
\ingroup jkqtptools_math_statistics_regression
@ -2108,7 +2109,7 @@ inline void jkqtpstatRegression(JKQTPStatRegressionModelType type, InputItX firs
/*! \brief calculate the robust linear regression coefficients for a given data range \a firstX / \a firstY ... \a lastX / \a lastY where the model is defined by \a type
So this function solves the Lp-norm optimization problem: \f[ (a^\ast, b^\ast)=\mathop{arg\;min}\limits_{a,b}\sum\limits_i\left(y_i-f_{\text{type}}(x_i,a,b)\right)^p \f]
So this function solves the Lp-norm optimization problem: \f[ (a^\ast, b^\ast)=\mathop{\mathrm{arg\;min}}\limits_{a,b}\sum\limits_i\left(y_i-f_{\text{type}}(x_i,a,b)\right)^p \f]
by reducing it to a linear fit by transforming x- and/or y-data
\ingroup jkqtptools_math_statistics_regression
@ -2155,7 +2156,7 @@ inline void jkqtpstatRobustIRLSRegression(JKQTPStatRegressionModelType type, Inp
/*! \brief calculate the robust linear regression coefficients for a given data range \a firstX / \a firstY ... \a lastX / \a lastY where the model is defined by \a type
So this function solves the Lp-norm optimization problem: \f[ (a^\ast, b^\ast)=\mathop{arg\;min}\limits_{a,b}\sum\limits_i\left(y_i-f_{\text{type}}(x_i,a,b)\right)^p \f]
So this function solves the Lp-norm optimization problem: \f[ (a^\ast, b^\ast)=\mathop{\mathrm{arg\;min}}\limits_{a,b}\sum\limits_i\left(y_i-f_{\text{type}}(x_i,a,b)\right)^p \f]
by reducing it to a linear fit by transforming x- and/or y-data
\ingroup jkqtptools_math_statistics_regression
@ -2395,11 +2396,124 @@ QString jkqtpstatPolynomialModel2Latex(PolyItP firstP, PolyItP lastP) {
/*! \brief calculates the coefficient of determination \f$ R^2 \f$ for a set of measurements \f$ (x_i,y_i) \f$ with a fit function \f$ f(x) \f$
\ingroup jkqtptools_math_statistics_poly
\tparam InputItX standard iterator type of \a firstX and \a lastX.
\tparam InputItY standard iterator type of \a firstY and \a lastY.
\param firstX iterator pointing to the first item in the x-dataset to use \f$ x_1 \f$
\param lastX iterator pointing behind the last item in the x-dataset to use \f$ x_N \f$
\param firstY iterator pointing to the first item in the y-dataset to use \f$ y_1 \f$
\param lastY iterator pointing behind the last item in the y-dataset to use \f$ y_N \f$
\param f function \f$ f(x) \f$, result of a fit to the data
\return coeffcicient of determination \f[ R^2=1-\frac{\sum_i\bigl[y_i-f(x_i)\bigr]^2}{\sum_i\bigl[y_i-\overline{y}\bigr]^2} \f] where \f[ \overline{y}=\frac{1}{N}\cdot\sum_iy_i \f]
\see https://en.wikipedia.org/wiki/Coefficient_of_determination
*/
template <class InputItX, class InputItY>
inline double jkqtpstatCoefficientOfDetermination(InputItX firstX, InputItX lastX, InputItY firstY, InputItY lastY, std::function<double(double)> f) {
auto itX=firstX;
auto itY=firstY;
const double yMean=jkqtpstatAverage(firstX,lastX);
double SSres=0;
double SStot=0;
for (; itX!=lastX && itY!=lastY; ++itX, ++itY) {
const double fit_x=jkqtp_todouble(*itX);
const double fit_y=jkqtp_todouble(*itY);
if (JKQTPIsOKFloat(fit_x) && JKQTPIsOKFloat(fit_y)) {
SStot+=jkqtp_sqr(fit_y-yMean);
SSres+=jkqtp_sqr(fit_y-f(fit_x));
}
}
return 1.0-SSres/SStot;
}
/*! \brief calculates the sum of deviations \f$ \chi^2 \f$ for a set of measurements \f$ (x_i,y_i) \f$ with a fit function \f$ f(x) \f$
\ingroup jkqtptools_math_statistics_poly
\tparam InputItX standard iterator type of \a firstX and \a lastX.
\tparam InputItY standard iterator type of \a firstY and \a lastY.
\param firstX iterator pointing to the first item in the x-dataset to use \f$ x_1 \f$
\param lastX iterator pointing behind the last item in the x-dataset to use \f$ x_N \f$
\param firstY iterator pointing to the first item in the y-dataset to use \f$ y_1 \f$
\param lastY iterator pointing behind the last item in the y-dataset to use \f$ y_N \f$
\param f function \f$ f(x) \f$, result of a fit to the data
\return sum of deviations \f[ \chi^2=\sum_i\bigl[y_i-f(x_i)\bigr]^2 \f]
\see https://en.wikipedia.org/wiki/Coefficient_of_determination
*/
template <class InputItX, class InputItY>
inline double jkqtpstatSumOfDeviations(InputItX firstX, InputItX lastX, InputItY firstY, InputItY lastY, std::function<double(double)> f) {
auto itX=firstX;
auto itY=firstY;
double SSres=0;
for (; itX!=lastX && itY!=lastY; ++itX, ++itY) {
const double fit_x=jkqtp_todouble(*itX);
const double fit_y=jkqtp_todouble(*itY);
if (JKQTPIsOKFloat(fit_x) && JKQTPIsOKFloat(fit_y)) {
SSres+=jkqtp_sqr(fit_y-f(fit_x));
}
}
return SSres;
}
/*! \brief calculates the weighted sum of deviations \f$ \chi^2 \f$ for a set of measurements \f$ (x_i,y_i,w_i) \f$ with a fit function \f$ f(x) \f$
\ingroup jkqtptools_math_statistics_poly
\tparam InputItX standard iterator type of \a firstX and \a lastX.
\tparam InputItY standard iterator type of \a firstY and \a lastY.
\tparam InputItW standard iterator type of \a firstW and \a lastW.
\param firstX iterator pointing to the first item in the x-dataset to use \f$ x_1 \f$
\param lastX iterator pointing behind the last item in the x-dataset to use \f$ x_N \f$
\param firstY iterator pointing to the first item in the y-dataset to use \f$ y_1 \f$
\param lastY iterator pointing behind the last item in the y-dataset to use \f$ y_N \f$
\param firstW iterator pointing to the first item in the weight-dataset to use \f$ w_1 \f$
\param lastW iterator pointing behind the last item in the weight-dataset to use \f$ w_N \f$
\param f function \f$ f(x) \f$, result of a fit to the data
\param fWeightDataToWi an optional function, which is applied to the data from \a firstW ... \a lastW to convert them to weight, i.e. \c wi=fWeightDataToWi(*itW)
e.g. if you use data used to draw error bars, you can use jkqtp_inversePropSaveDefault(). The default is jkqtp_identity(), which just returns the values.
In the case of jkqtp_inversePropSaveDefault(), a datapoint x,y, has a large weight, if it's error is small and in the case if jkqtp_identity() it's weight
is directly proportional to the given value.
\return weighted sum of deviations \f[ \chi^2=\sum_iw_i^2\cdot\bigl[y_i-f(x_i)\bigr]^2 \f]
\see https://en.wikipedia.org/wiki/Reduced_chi-squared_statistic
*/
template <class InputItX, class InputItY, class InputItW>
inline double jkqtpstatWeightedSumOfDeviations(InputItX firstX, InputItX lastX, InputItY firstY, InputItY lastY, InputItW firstW, InputItW lastW, std::function<double(double)> f, std::function<double(double)> fWeightDataToWi=&jkqtp_identity<double>) {
auto itX=firstX;
auto itY=firstY;
auto itW=firstW;
double SSres=0;
for (; itX!=lastX && itY!=lastY && itW!=lastW; ++itX, ++itY, ++itW) {
const double fit_x=jkqtp_todouble(*itX);
const double fit_y=jkqtp_todouble(*itY);
const double fit_w2=jkqtp_sqr(fWeightDataToWi(jkqtp_todouble(*itW)));
if (JKQTPIsOKFloat(fit_x) && JKQTPIsOKFloat(fit_y) && JKQTPIsOKFloat(fit_w2)) {
SSres+=fit_w2*jkqtp_sqr(fit_y-f(fit_x));
}
}
return SSres;
}

View File

@ -212,38 +212,46 @@ std::string jkqtp_tolower(const std::string& s){
return res;
}
std::string jkqtp_floattolatexstr(double data, int past_comma, bool remove_trail0, double belowIsZero, double minNoExponent, double maxNoExponent){
if ((belowIsZero>0) && (fabs(data)<belowIsZero)) return "\\rm{0}";
if (data==0) return "\\rm{0}";
std::string jkqtp_floattolatexstr(double data, int past_comma, bool remove_trail0, double belowIsZero, double minNoExponent, double maxNoExponent, bool ensurePlusMinus){
if ((belowIsZero>0) && (fabs(data)<belowIsZero)) {
if (ensurePlusMinus) return "+\\rm{0}";
else return "\\rm{0}";
}
if (fabs(data)<5*std::numeric_limits<double>::epsilon()) {
if (ensurePlusMinus) return "+\\rm{0}";
else return "\\rm{0}";
}
double adata=fabs(data);
std::string res=jkqtp_floattostr(data, past_comma, remove_trail0);
/*std::string form="%."+inttostr(past_comma)+"lf";
std::string res=jkqtp_format(form,data);
std::string s="";
if (data<0) s="-";*/
long exp=(long)floor(log(adata)/log(10.0));
//std::cout<<"data="<<data<<" res="<<res<<" exp="<<exp<<" past_comma="<<past_comma<<std::endl;
//if (exp==0 || exp==-1 || exp==1) return res;
if ((minNoExponent<=fabs(data)) && (fabs(data)<=maxNoExponent)) return res;
//if ((-past_comma<exp) && (exp<past_comma)) return res;
//std::cout<<"adata="<<adata<<" log(adata)/log(10)="<<log(adata)/log(10.0)<<" exp="<<exp<<" adata/pow(10, exp)="<<adata/pow(10.0, (double)exp)<<"\n";
std::string v=jkqtp_floattostr(data/pow(10.0, static_cast<double>(exp)), past_comma, remove_trail0);
//std::cout<<"floattolatexstr: v="<<v<<" exp="<<exp<<std::endl;
if (v!="1" && v!="10") return v+std::string("{\\times}10^{")+jkqtp_inttostr(exp)+"}";
if (v=="10") exp=exp+1;
return std::string("10^{")+jkqtp_inttostr(exp)+"}";
long exp=static_cast<long>(floor(log(adata)/log(10.0)));
if ((minNoExponent>fabs(data)) || (fabs(data)>maxNoExponent)) {
std::string v=jkqtp_floattostr(data/pow(10.0, static_cast<double>(exp)), past_comma, remove_trail0);
if (v!="1" && v!="10") {
res=v+std::string("{\\times}10^{")+jkqtp_inttostr(exp)+"}";
} else {
if (v=="10") exp=exp+1;
res=std::string("10^{")+jkqtp_inttostr(exp)+"}";
}
}
if (ensurePlusMinus && res.size()>0) {
if (res[0]!='-' && res[0]!='+') {
if (data<0) res="-"+res;
else res="+"+res;
}
}
return res;
}
std::string jkqtp_floattohtmlstr(double data, int past_comma, bool remove_trail0, double belowIsZero, double minNoExponent, double maxNoExponent){
std::string result;
if ((belowIsZero>0) && (fabs(data)<belowIsZero)) return "0";
if (data==0) return "0";
if (fabs(data)<5*std::numeric_limits<double>::epsilon()) return "0";
double adata=fabs(data);
std::string res=jkqtp_floattostr(data, past_comma, remove_trail0);
long exp=(long)floor(log(adata)/log(10.0));
long exp=static_cast<long>(floor(log(adata)/log(10.0)));
if ((minNoExponent<=fabs(data)) && (fabs(data)<maxNoExponent)) return res;
//if ((-past_comma<exp) && (exp<past_comma)) result= res;
else {
@ -568,9 +576,9 @@ QString jkqtp_floattounitqstr(double data, int past_comma, bool remove_trail0)
return QString::fromStdString(jkqtp_floattounitstr(data, past_comma, remove_trail0));
}
QString jkqtp_floattolatexqstr(double data, int past_comma, bool remove_trail0, double belowIsZero, double minNoExponent, double maxNoExponent)
QString jkqtp_floattolatexqstr(double data, int past_comma, bool remove_trail0, double belowIsZero, double minNoExponent, double maxNoExponent, bool ensurePlusMinus)
{
return QString::fromStdString(jkqtp_floattolatexstr(data, past_comma, remove_trail0, belowIsZero, minNoExponent, maxNoExponent));
return QString::fromStdString(jkqtp_floattolatexstr(data, past_comma, remove_trail0, belowIsZero, minNoExponent, maxNoExponent, ensurePlusMinus));
}
QString jkqtp_floattohtmlqstr(double data, int past_comma, bool remove_trail0, double belowIsZero, double minNoExponent, double maxNoExponent)

View File

@ -163,7 +163,7 @@ JKQTP_LIB_EXPORT std::string jkqtp_floattounitstr(double data, int past_comma=5,
/** \brief convert a double to a string, encoding powers of ten as exponent in LaTeX notation (e.g. <code>-1.23\\cdot 10^{-5}</code>)
* \ingroup jkqtptools_string
*/
JKQTP_LIB_EXPORT std::string jkqtp_floattolatexstr(double data, int past_comma=5, bool remove_trail0=false, double belowIsZero=1e-16, double minNoExponent=1e-3, double maxNoExponent=1e4);
JKQTP_LIB_EXPORT std::string jkqtp_floattolatexstr(double data, int past_comma=5, bool remove_trail0=false, double belowIsZero=1e-16, double minNoExponent=1e-3, double maxNoExponent=1e4, bool ensurePlusMinus=false);
/** \brief convert a double to a string, encoding powers of ten as exponent with HTML tags
* \ingroup jkqtptools_string
*/
@ -176,7 +176,7 @@ JKQTP_LIB_EXPORT QString jkqtp_floattounitqstr(double data, int past_comma=5, bo
/** \brief convert a double to a string, encoding powers of ten as exponent in LaTeX notation (e.g. <code>-1.23\\cdot 10^{-5}</code>)
* \ingroup jkqtptools_string
*/
JKQTP_LIB_EXPORT QString jkqtp_floattolatexqstr(double data, int past_comma=5, bool remove_trail0=false, double belowIsZero=1e-16, double minNoExponent=1e-3, double maxNoExponent=1e4);
JKQTP_LIB_EXPORT QString jkqtp_floattolatexqstr(double data, int past_comma=5, bool remove_trail0=false, double belowIsZero=1e-16, double minNoExponent=1e-3, double maxNoExponent=1e4, bool ensurePlusMinus=false);
/** \brief convert a double to a string, encoding powers of ten as exponent with HTML tags
* \ingroup jkqtptools_string
*/

View File

@ -689,7 +689,7 @@ inline JKQTPXFunctionLineGraph* jkqtpstatAddLinearRegression(JKQTBasePlotter* pl
JKQTPXFunctionLineGraph* g=new JKQTPXFunctionLineGraph(plotter);
g->setSpecialFunction(JKQTPXFunctionLineGraph::SpecialFunction::Line);
g->setParamsV(cA, cB);
g->setTitle(QString("regression: $f(x) = %1 + %2 \\cdot x$").arg(jkqtp_floattolatexqstr(cA)).arg(jkqtp_floattolatexqstr(cB)));
g->setTitle(QString("regression: $f(x) = %1%2{\\cdot}x, \\chi^2=%4, R^2=%3$").arg(jkqtp_floattolatexqstr(cA, 2, true, 1e-16,1e-2, 1e4,false)).arg(jkqtp_floattolatexqstr(cB, 2, true, 1e-16,1e-2, 1e4,true)).arg(jkqtp_floattolatexqstr(jkqtpstatCoefficientOfDetermination(firstX,lastX,firstY,lastY,jkqtpStatGenerateRegressionModel(JKQTPStatRegressionModelType::Linear, cA, cB)),3)).arg(jkqtp_floattolatexqstr(jkqtpstatSumOfDeviations(firstX,lastX,firstY,lastY,jkqtpStatGenerateRegressionModel(JKQTPStatRegressionModelType::Linear, cA, cB)),3)));
plotter->addGraph(g);
if (coeffA) *coeffA=cA;
if (coeffB) *coeffB=cB;
@ -765,7 +765,7 @@ inline JKQTPXFunctionLineGraph* jkqtpstatAddRobustIRLSLinearRegression(JKQTBaseP
JKQTPXFunctionLineGraph* g=new JKQTPXFunctionLineGraph(plotter);
g->setSpecialFunction(JKQTPXFunctionLineGraph::SpecialFunction::Line);
g->setParamsV(cA, cB);
g->setTitle(QString("robust regression: $f(x) = %1 + %2 \\cdot x$").arg(jkqtp_floattolatexqstr(cA)).arg(jkqtp_floattolatexqstr(cB)));
g->setTitle(QString("robust regression: $f(x) = %1%2{\\cdot}x, \\chi^2=%4, R^2=%3$").arg(jkqtp_floattolatexqstr(cA, 2, true, 1e-16,1e-2, 1e4,false)).arg(jkqtp_floattolatexqstr(cB, 2, true, 1e-16,1e-2, 1e4,true)).arg(jkqtp_floattolatexqstr(jkqtpstatCoefficientOfDetermination(firstX,lastX,firstY,lastY,jkqtpStatGenerateRegressionModel(JKQTPStatRegressionModelType::Linear, cA, cB)),3)).arg(jkqtp_floattolatexqstr(jkqtpstatSumOfDeviations(firstX,lastX,firstY,lastY,jkqtpStatGenerateRegressionModel(JKQTPStatRegressionModelType::Linear, cA, cB)),3)));
plotter->addGraph(g);
if (coeffA) *coeffA=cA;
if (coeffB) *coeffB=cB;
@ -853,7 +853,7 @@ inline JKQTPXFunctionLineGraph* jkqtpstatAddLinearWeightedRegression(JKQTBasePlo
JKQTPXFunctionLineGraph* g=new JKQTPXFunctionLineGraph(plotter);
g->setSpecialFunction(JKQTPXFunctionLineGraph::SpecialFunction::Line);
g->setParamsV(cA, cB);
g->setTitle(QString("weighted regression: $f(x) = %1 + %2 \\cdot x$").arg(jkqtp_floattolatexqstr(cA)).arg(jkqtp_floattolatexqstr(cB)));
g->setTitle(QString("weighted regression: $f(x) = %1%2{\\cdot}x, \\chi^2=%4, R^2=%3$").arg(jkqtp_floattolatexqstr(cA, 2, true, 1e-16,1e-2, 1e4,false)).arg(jkqtp_floattolatexqstr(cB, 2, true, 1e-16,1e-2, 1e4,true)).arg(jkqtp_floattolatexqstr(jkqtpstatCoefficientOfDetermination(firstX,lastX,firstY,lastY,jkqtpStatGenerateRegressionModel(JKQTPStatRegressionModelType::Linear, cA, cB)),3)).arg(jkqtp_floattolatexqstr(jkqtpstatWeightedSumOfDeviations(firstX,lastX,firstY,lastY,firstW,lastW,jkqtpStatGenerateRegressionModel(JKQTPStatRegressionModelType::Linear, cA, cB),fWeightDataToWi),3)));
plotter->addGraph(g);
if (coeffA) *coeffA=cA;
if (coeffB) *coeffB=cB;
@ -938,7 +938,7 @@ inline JKQTPXFunctionLineGraph* jkqtpstatAddRegression(JKQTBasePlotter* plotter,
jkqtpstatRegression(type, firstX, lastX, firstY, lastY, cA, cB, fixA, fixB);
JKQTPXFunctionLineGraph* g=new JKQTPXFunctionLineGraph(plotter);
g->setPlotFunctionFunctor(jkqtpStatGenerateRegressionModel(type, cA, cB));
g->setTitle(QString("regression: $%1$").arg(jkqtpstatRegressionModel2Latex(type, cA, cB)));
g->setTitle(QString("regression: $%1, \\chi^2=%3, R^2=%2$").arg(jkqtpstatRegressionModel2Latex(type, cA, cB)).arg(jkqtp_floattolatexqstr(jkqtpstatCoefficientOfDetermination(firstX,lastX,firstY,lastY,jkqtpStatGenerateRegressionModel(type, cA, cB)),3)).arg(jkqtp_floattolatexqstr(jkqtpstatSumOfDeviations(firstX,lastX,firstY,lastY,jkqtpStatGenerateRegressionModel(type, cA, cB)),3)));
plotter->addGraph(g);
if (coeffA) *coeffA=cA;
if (coeffB) *coeffB=cB;
@ -1014,7 +1014,7 @@ inline JKQTPXFunctionLineGraph* jkqtpstatAddRobustIRLSRegression(JKQTBasePlotter
jkqtpstatRobustIRLSRegression(type, firstX, lastX, firstY, lastY, cA, cB, fixA, fixB, p, iterations);
JKQTPXFunctionLineGraph* g=new JKQTPXFunctionLineGraph(plotter);
g->setPlotFunctionFunctor(jkqtpStatGenerateRegressionModel(type, cA, cB));
g->setTitle(QString("robust regression: $%1$").arg(jkqtpstatRegressionModel2Latex(type, cA, cB)));
g->setTitle(QString("robust regression: $%1, \\chi^2=%3, R^2=%2$").arg(jkqtpstatRegressionModel2Latex(type, cA, cB)).arg(jkqtp_floattolatexqstr(jkqtpstatCoefficientOfDetermination(firstX,lastX,firstY,lastY,jkqtpStatGenerateRegressionModel(type, cA, cB)),3)).arg(jkqtp_floattolatexqstr(jkqtpstatSumOfDeviations(firstX,lastX,firstY,lastY,jkqtpStatGenerateRegressionModel(type, cA, cB)),3)));
plotter->addGraph(g);
if (coeffA) *coeffA=cA;
if (coeffB) *coeffB=cB;
@ -1104,7 +1104,7 @@ inline JKQTPXFunctionLineGraph* jkqtpstatAddWeightedRegression(JKQTBasePlotter*
jkqtpstatWeightedRegression(type, firstX, lastX, firstY, lastY, firstW, lastW, cA, cB, fixA, fixB, fWeightDataToWi);
JKQTPXFunctionLineGraph* g=new JKQTPXFunctionLineGraph(plotter);
g->setPlotFunctionFunctor(jkqtpStatGenerateRegressionModel(type, cA, cB));
g->setTitle(QString("weighted regression: $%1$").arg(jkqtpstatRegressionModel2Latex(type, cA, cB)));
g->setTitle(QString("weighted regression: $%1, \\chi^2=%3, R^2=%2$").arg(jkqtpstatRegressionModel2Latex(type, cA, cB)).arg(jkqtp_floattolatexqstr(jkqtpstatCoefficientOfDetermination(firstX,lastX,firstY,lastY,jkqtpStatGenerateRegressionModel(type, cA, cB)),3)).arg(jkqtp_floattolatexqstr(jkqtpstatSumOfDeviations(firstX,lastX,firstY,lastY,jkqtpStatGenerateRegressionModel(type, cA, cB)),3)));
plotter->addGraph(g);
if (coeffA) *coeffA=cA;
if (coeffB) *coeffB=cB;
@ -1176,7 +1176,7 @@ inline JKQTPXFunctionLineGraph* jkqtpstatAddPolyFit(JKQTBasePlotter* plotter, In
JKQTPXFunctionLineGraph* gPoly=new JKQTPXFunctionLineGraph(plotter);
jkqtpstatPolyFit(firstX,lastX,firstY,lastY,P,std::back_inserter(pFit));
gPoly->setPlotFunctionFunctor(jkqtpstatGeneratePolynomialModel(pFit.begin(), pFit.end()));
gPoly->setTitle(QString("regression: $%1$").arg(jkqtpstatPolynomialModel2Latex(pFit.begin(), pFit.end())));
gPoly->setTitle(QString("regression: $%1, \\chi^2=%3, R^2=%2$").arg(jkqtpstatPolynomialModel2Latex(pFit.begin(), pFit.end())).arg(jkqtp_floattolatexqstr(jkqtpstatCoefficientOfDetermination(firstX,lastX,firstY,lastY,jkqtpstatGeneratePolynomialModel(pFit.begin(), pFit.end())),3)).arg(jkqtp_floattolatexqstr(jkqtpstatSumOfDeviations(firstX,lastX,firstY,lastY,jkqtpstatGeneratePolynomialModel(pFit.begin(), pFit.end())),3)));
std::copy(pFit.begin(), pFit.end(), firstRes);
plotter->addGraph(gPoly);
return gPoly;