split statistics library by topic into several .H/.CPP-pairs

This commit is contained in:
jkriege2 2019-06-12 12:58:47 +02:00
parent 8abb2492fa
commit 6d95c5c64c
13 changed files with 3158 additions and 2926 deletions

View File

@ -0,0 +1,46 @@
/*
Copyright (c) 2008-2019 Jan W. Krieger (<jan@jkrieger.de>)
last modification: $LastChangedDate$ (revision $Rev$)
This software is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License (LGPL) as published by
the Free Software Foundation, either version 2.1 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License (LGPL) for more details.
You should have received a copy of the GNU Lesser General Public License (LGPL)
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "jkqtpstatbasics.h"
JKQTPStat5NumberStatistics::JKQTPStat5NumberStatistics():
minimum(JKQTP_DOUBLE_NAN),
minimumQuantile(0),
quantile1(JKQTP_DOUBLE_NAN),
quantile1Spec(0.25),
median(JKQTP_DOUBLE_NAN),
quantile2(JKQTP_DOUBLE_NAN),
quantile2Spec(0.75),
maximum(JKQTP_DOUBLE_NAN),
maximumQuantile(1),
N(0)
{}
double JKQTPStat5NumberStatistics::IQR() const {
return quantile2-quantile1;
}
double JKQTPStat5NumberStatistics::IQRSignificanceEstimate() const {
return 2.0*(1.58*(IQR()))/sqrt(static_cast<double>(N));
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,24 @@
/*
Copyright (c) 2008-2019 Jan W. Krieger (<jan@jkrieger.de>)
last modification: $LastChangedDate$ (revision $Rev$)
This software is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License (LGPL) as published by
the Free Software Foundation, either version 2.1 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License (LGPL) for more details.
You should have received a copy of the GNU Lesser General Public License (LGPL)
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "jkqtpstathistogram.h"

View File

@ -0,0 +1,323 @@
/*
Copyright (c) 2008-2019 Jan W. Krieger (<jan@jkrieger.de>)
last modification: $LastChangedDate$ (revision $Rev$)
This software is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License (LGPL) as published by
the Free Software Foundation, either version 2.1 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License (LGPL) for more details.
You should have received a copy of the GNU Lesser General Public License (LGPL)
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef JKQTPSTATHISTOGRAM_H_INCLUDED
#define JKQTPSTATHISTOGRAM_H_INCLUDED
#include <stdint.h>
#include <cmath>
#include <stdlib.h>
#include <string.h>
#include <iostream>
#include <stdio.h>
#include <limits>
#include <vector>
#include <utility>
#include <cfloat>
#include <ostream>
#include <iomanip>
#include <sstream>
#include "jkqtcommon/jkqtp_imexport.h"
#include "jkqtcommon/jkqtplinalgtools.h"
#include "jkqtcommon/jkqtparraytools.h"
#include "jkqtcommon/jkqtpdebuggingtools.h"
#include "jkqtcommon/jkqtpstatbasics.h"
/*! \brief defines where the returned x-coordinates (in histogramXOut) lie inside a histogram bin
\ingroup jkqtptools_math_statistics_1dhist
\see jkqtpstatHistogram()
*/
enum class JKQTPStatHistogramBinXMode {
XIsLeft, /*!< \brief x-location is the left edge of the bin */
XIsMid, /*!< \brief x-location is the middle of the bin */
XIsRight /*!< \brief x-location is the right edge of the bin */
};
/*! \brief calculate an autoranged 1-dimensional histogram from the given data range \a first ... \a last, bins defined by their number
\ingroup jkqtptools_math_statistics_1dhist
\tparam InputIt standard iterator type of \a first and \a last.
\tparam OutputIt standard output iterator type used for the outliers output \a histogramXOut and \a histogramYOut, use e.g. std::back_inserter
\param first iterator pointing to the first item in the dataset to use \f$ X_1 \f$
\param last iterator pointing behind the last item in the dataset to use \f$ X_N \f$
\param[out] histogramXOut output iterator that receives x-positions of the histogram bins. Location of this value inside the bin range is defined by \a binXMode
\param[out] histogramYOut output iterator that receives counts/frequencies of the histogram bins
\param bins number of bins in the output histogram
\param normalized indicates whether the histogram has to be normalized
\param cummulative if \c true, a cummulative histogram is calculated
\param binXMode defines where the returned x-coordinates (in histogramXOut) lie inside the histogram bin (see JKQTPStatHistogramBinXMode)
\see jkqtpstatAddHHistogram1DAutoranged()
*/
template <class InputIt, class OutputIt>
inline void jkqtpstatHistogram1DAutoranged(InputIt first, InputIt last, OutputIt histogramXOut, OutputIt histogramYOut, int bins=11, bool normalized=true, bool cummulative=false, JKQTPStatHistogramBinXMode binXMode=JKQTPStatHistogramBinXMode::XIsLeft) {
double minV=0, maxV=0;
size_t N=0;
jkqtpstatMinMax<InputIt>(first, last, minV, maxV, nullptr, nullptr, &N);
std::vector<double> histX;
std::vector<double> histY;
const double range=maxV-minV;
const double binw=range/static_cast<double>(bins);
// initialize the histogram
for (int i=0; i<bins; i++) {
histX.push_back(minV+static_cast<double>(i)*binw);
histY.push_back(0);
}
// calculate the histogram
for (auto it=first; it!=last; ++it) {
const double v=jkqtp_todouble(*it);
if (JKQTPIsOKFloat(v)) {
size_t b=jkqtp_bounded<size_t>(0, static_cast<size_t>(floor((v-minV)/binw)), bins-1);
histY[b]++;
}
}
// output the histogram
double xoffset=0;
if (binXMode==JKQTPStatHistogramBinXMode::XIsRight) xoffset=binw;
if (binXMode==JKQTPStatHistogramBinXMode::XIsMid) xoffset=binw/2.0;
double NNorm=1;
if (normalized) {
NNorm=static_cast<double>(N);
}
double h=0;
for (size_t i=0; i<histX.size(); i++) {
*++histogramXOut=histX[i]+xoffset;
if (cummulative) h+=(histY[i]/NNorm);
else h=histY[i]/NNorm;
*++histogramYOut=h;
}
}
/*! \brief calculate an autoranged 1-dimensional histogram from the given data range \a first ... \a last, bins defined by their width
\ingroup jkqtptools_math_statistics_1dhist
\tparam InputIt standard iterator type of \a first and \a last.
\tparam OutputIt standard output iterator type used for the outliers output \a histogramXOut and \a histogramYOut, use e.g. std::back_inserter
\param first iterator pointing to the first item in the dataset to use \f$ X_1 \f$
\param last iterator pointing behind the last item in the dataset to use \f$ X_N \f$
\param[out] histogramXOut output iterator that receives x-positions of the histogram bins. Location of this value inside the bin range is defined by \a binXMode
\param[out] histogramYOut output iterator that receives counts/frequencies of the histogram bins
\param binWidth width of the bins
\param normalized indicates whether the histogram has to be normalized
\param cummulative if \c true, a cummulative histogram is calculated
\param binXMode defines where the returned x-coordinates (in histogramXOut) lie inside the histogram bin (see JKQTPStatHistogramBinXMode)
\see jkqtpstatAddHHistogram1DAutoranged()
*/
template <class InputIt, class OutputIt>
inline void jkqtpstatHistogram1DAutoranged(InputIt first, InputIt last, OutputIt histogramXOut, OutputIt histogramYOut, double binWidth, bool normalized=true, bool cummulative=false, JKQTPStatHistogramBinXMode binXMode=JKQTPStatHistogramBinXMode::XIsLeft) {
double minV=0, maxV=0;
size_t N=0;
jkqtpstatMinMax<InputIt>(first, last, minV, maxV, nullptr, nullptr, &N);
std::vector<double> histX;
std::vector<double> histY;
const double range=maxV-minV;
const double binw=binWidth;
const int bins=static_cast<int>(ceil(range/binWidth));
// initialize the histogram
for (int i=0; i<bins; i++) {
histX.push_back(minV+static_cast<double>(i)*binw);
histY.push_back(0);
}
// calculate the histogram
for (auto it=first; it!=last; ++it) {
const double v=jkqtp_todouble(*it);
if (JKQTPIsOKFloat(v)) {
size_t b=jkqtp_bounded<size_t>(0, static_cast<size_t>(floor((v-minV)/binw)), bins-1);
histY[b]++;
}
}
// output the histogram
double xoffset=0;
if (binXMode==JKQTPStatHistogramBinXMode::XIsRight) xoffset=binw;
if (binXMode==JKQTPStatHistogramBinXMode::XIsMid) xoffset=binw/2.0;
double NNorm=1;
if (normalized) {
NNorm=static_cast<double>(N);
}
double h=0;
for (size_t i=0; i<histX.size(); i++) {
*++histogramXOut=histX[i]+xoffset;
if (cummulative) h+=(histY[i]/NNorm);
else h=histY[i]/NNorm;
*++histogramYOut=h;
}
}
/*! \brief calculate an autoranged 1-dimensional histogram from the given data range \a first ... \a last, bins defined the range \a binsFirst ... \a binsLast
\ingroup jkqtptools_math_statistics_1dhist
\tparam InputIt standard iterator type of \a first and \a last.
\tparam BinsInputIt standard iterator type of \a binsFirst and \a binsLast.
\tparam OutputIt standard output iterator type used for the outliers output \a histogramXOut and \a histogramYOut, use e.g. std::back_inserter
\param first iterator pointing to the first item in the dataset to use \f$ X_1 \f$
\param last iterator pointing behind the last item in the dataset to use \f$ X_N \f$
\param binsFirst iterator pointing to the first item in the set of histogram bins
\param binsLast iterator pointing behind the last item in the set of histogram bins
\param[out] histogramXOut output iterator that receives x-positions of the histogram bins. Location of this value inside the bin range is defined by \a binXMode
\param[out] histogramYOut output iterator that receives counts/frequencies of the histogram bins
\param normalized indicates whether the histogram has to be normalized
\param cummulative if \c true, a cummulative histogram is calculated
\param binXMode defines where the returned x-coordinates (in histogramXOut) lie inside the histogram bin (see JKQTPStatHistogramBinXMode)
\see jkqtpstatAddHHistogram1D()
*/
template <class InputIt, class BinsInputIt, class OutputIt>
inline void jkqtpstatHistogram1D(InputIt first, InputIt last, BinsInputIt binsFirst, BinsInputIt binsLast, OutputIt histogramXOut, OutputIt histogramYOut, bool normalized=true, bool cummulative=false, JKQTPStatHistogramBinXMode binXMode=JKQTPStatHistogramBinXMode::XIsLeft) {
double minV=0, maxV=0;
size_t N=0;
jkqtpstatMinMax<InputIt>(first, last, minV, maxV, nullptr, nullptr, &N);
std::vector<double> histX;
std::vector<double> histY;
// initialize the histogram
for (auto it=binsFirst; it!=binsLast; ++it) {
histX.push_back(jkqtp_todouble(*it));
histY.push_back(0);
}
std::sort(histX.begin(), histX.end());
// calculate the histogram
for (auto it=first; it!=last; ++it) {
const double v=jkqtp_todouble(*it);
if (JKQTPIsOKFloat(v)) {
auto itb=std::lower_bound(histX.begin(), histX.end(), v);
size_t bin=jkqtp_bounded<size_t>(0,static_cast<size_t>(abs(std::distance(histX.begin(), itb))), histY.size()-1);
histY[bin]++;
}
}
// output the histogram
double NNorm=1;
if (normalized) {
NNorm=static_cast<double>(N);
}
double h=0;
for (size_t i=0; i<histX.size(); i++) {
double xoffset=0;
double binw=1;
if (binXMode!=JKQTPStatHistogramBinXMode::XIsLeft) {
if (i==0 && i+1<histX.size()) binw=histX[1]-histX[0];
else if (i==histX.size()-1 && static_cast<int>(i)-1>0) binw=histX[histX.size()-1]-histX[histX.size()-2];
else if (i<histX.size() && i+1<histX.size()) binw=histX[i+1]-histX[i];
if (binXMode==JKQTPStatHistogramBinXMode::XIsRight) xoffset=binw;
if (binXMode==JKQTPStatHistogramBinXMode::XIsMid) xoffset=binw/2.0;
}
*++histogramXOut=histX[i]+xoffset;
if (cummulative) h+=(histY[i]/NNorm);
else h=histY[i]/NNorm;
*++histogramYOut=h;
}
}
/*! \brief calculate a 2-dimensional histogram from the given data range \a firstX / \a firstY ... \a lastY / \a lastY
\ingroup jkqtptools_math_statistics_2dhist
\tparam InputItX standard iterator type of \a firstX and \a lastX.
\tparam InputItY standard iterator type of \a firstY and \a lastY.
\tparam OutputIt standard output iterator type used for the outliers output \a histogramXOut and \a histogramYOut, use e.g. std::back_inserter
\param firstX iterator pointing to the first x-position item in the dataset to use \f$ X_1 \f$
\param lastX iterator pointing behind the last x-position item in the dataset to use \f$ X_N \f$
\param firstY iterator pointing to the first y-position item in the dataset to use \f$ Y_1 \f$
\param lastY iterator pointing behind the last y-position item in the dataset to use \f$ Y_N \f$
\param[out] histogramImgOut output iterator that receives counts of the histogram bins in row-major ordering
\param xmin position of the first histogram bin in x-direction
\param xmax position of the last histogram bin in x-direction
\param ymin position of the first histogram bin in y-direction
\param ymax position of the last histogram bin in y-direction
\param xbins number of bins in x-direction (i.e. width of the output histogram \a histogramImgOut )
\param ybins number of bins in y-direction (i.e. height of the output histogram \a histogramImgOut )
\param normalized indicates whether the histogram has to be normalized
\see jkqtpstatAddHHistogram1DAutoranged()
*/
template <class InputItX, class InputItY, class OutputIt>
inline void jkqtpstatHistogram2D(InputItX firstX, InputItX lastX, InputItY firstY, InputItY lastY, OutputIt histogramImgOut, double xmin, double xmax, double ymin, double ymax, size_t xbins=10, size_t ybins=10, bool normalized=true) {
const double binwx=fabs(xmax-xmin)/static_cast<double>(xbins);
const double binwy=fabs(ymax-ymin)/static_cast<double>(ybins);
std::vector<double> hist;
std::fill_n(std::back_inserter(hist), xbins*ybins, 0.0);
// calculate the histogram
auto itX=firstX;
auto itY=firstY;
size_t N=0;
for (; (itX!=lastX) && (itY!=lastY); ++itX, ++itY) {
const double vx=jkqtp_todouble(*itX);
const double vy=jkqtp_todouble(*itY);
if (JKQTPIsOKFloat(vx) && JKQTPIsOKFloat(vy)) {
const size_t bx=jkqtp_bounded<size_t>(0, static_cast<size_t>(floor((vx-xmin)/binwx)), xbins-1);
const size_t by=jkqtp_bounded<size_t>(0, static_cast<size_t>(floor((vy-ymin)/binwy)), ybins-1);
hist[by*xbins+bx]++;
N++;
}
}
// output the histogram
double NNorm=1;
if (normalized) {
NNorm=static_cast<double>(N);
}
std::transform(hist.begin(), hist.end(), histogramImgOut, [NNorm](double v) { return v/NNorm; });
}
#endif // JKQTPSTATHISTOGRAM_H_INCLUDED

View File

@ -20,151 +20,3 @@
#include "jkqtpstatisticstools.h"
double jkqtpstatKernel1DGaussian(double t) {
return exp(-0.5*t*t)/JKQTPSTATISTICS_SQRT_2PI;
}
double jkqtpstatKernel1DCauchy(double t) {
return 1.0/(M_PI*(1.0+t*t));
}
double jkqtpstatKernel1DPicard(double t) {
return exp(-0.5*fabs(t))/2.0;
}
double jkqtpstatKernel1DEpanechnikov(double t) {
return (fabs(t)<1.0)?(0.75*(1.0-t*t)):0.0;
}
double jkqtpstatKernel1DUniform(double t) {
return (fabs(t)<=1.0)?0.5:0.0;
}
double jkqtpstatKernel1DTriangle(double t) {
return (fabs(t)<=1.0)?(1.0-fabs(t)):0.0;
}
double jkqtpstatKernel1DQuartic(double t) {
return (fabs(t)<=1.0)?(15.0/16.0*jkqtp_sqr(1.0-t*t)):0.0;
}
double jkqtpstatKernel1DTriweight(double t) {
return (fabs(t)<1.0)?(35.0/32.0*jkqtp_cube(1.0-t*t)):0.0;
}
double jkqtpstatKernel1DTricube(double t) {
return (fabs(t)<1.0)?(70.0/81.0*jkqtp_cube(1.0-jkqtp_cube(fabs(t)))):0.0;
}
double jkqtpstatKernel1DCosine(double t) {
return (fabs(t)<1.0)?(M_PI/4.0*cos(t*M_PI/2.0)):0.0;
}
double jkqtpstatKernel2DGaussian(double tx, double ty)
{
return exp(-0.5*(tx*tx+ty*ty))/(2.0*M_PI);
}
double jkqtpstatKernel2DUniform(double tx, double ty) {
return (fabs(tx)<1.0 && fabs(ty)<=1.0)?0.25:0.0;
}
JKQTPStat5NumberStatistics::JKQTPStat5NumberStatistics():
minimum(JKQTP_DOUBLE_NAN),
minimumQuantile(0),
quantile1(JKQTP_DOUBLE_NAN),
quantile1Spec(0.25),
median(JKQTP_DOUBLE_NAN),
quantile2(JKQTP_DOUBLE_NAN),
quantile2Spec(0.75),
maximum(JKQTP_DOUBLE_NAN),
maximumQuantile(1),
N(0)
{}
double JKQTPStat5NumberStatistics::IQR() const {
return quantile2-quantile1;
}
double JKQTPStat5NumberStatistics::IQRSignificanceEstimate() const {
return 2.0*(1.58*(IQR()))/sqrt(static_cast<double>(N));
}
std::function<double (double, double, double)> jkqtpStatGenerateRegressionModel(JKQTPStatRegressionModelType type) {
switch(type) {
case JKQTPStatRegressionModelType::Linear: return [](double x, double a, double b)->double { return a+b*x; };
case JKQTPStatRegressionModelType::PowerLaw: return [](double x, double a, double b)->double { return a*pow(x,b); };
case JKQTPStatRegressionModelType::Exponential: return [](double x, double a, double b)->double { return a*exp(b*x); };
case JKQTPStatRegressionModelType::Logarithm: return [](double x, double a, double b)->double { return a+b*log(x); };
}
throw std::runtime_error("unknown JKQTPStatRegressionModelType in jkqtpStatGenerateRegressionModel()");
}
QString jkqtpstatRegressionModel2Latex(JKQTPStatRegressionModelType type, double a, double b) {
switch(type) {
case JKQTPStatRegressionModelType::Linear: return QString("f(x)=%1%2{\\cdot}x").arg(jkqtp_floattolatexqstr(a, 2, true, 1e-16,1e-2, 1e4,false)).arg(jkqtp_floattolatexqstr(b, 2, true, 1e-16,1e-2, 1e4,true));
case JKQTPStatRegressionModelType::PowerLaw: return QString("f(x)=%1{\\cdot}x^{%2}").arg(jkqtp_floattolatexqstr(a, 3)).arg(jkqtp_floattolatexqstr(b, 3));
case JKQTPStatRegressionModelType::Exponential: return QString("f(x)=%1{\\cdot}\\exp(%2{\\cdot}x)").arg(jkqtp_floattolatexqstr(a, 3)).arg(jkqtp_floattolatexqstr(b, 3));
case JKQTPStatRegressionModelType::Logarithm: return QString("f(x)=%1%2{\\cdot}\\ln(x)").arg(jkqtp_floattolatexqstr(a, 2, true, 1e-16,1e-2, 1e4,false)).arg(jkqtp_floattolatexqstr(b, 2, true, 1e-16,1e-2, 1e4,true));
}
throw std::runtime_error("unknown JKQTPStatRegressionModelType in jkqtpstatRegressionModel2Latex()");
}
std::function<double (double)> jkqtpStatGenerateRegressionModel(JKQTPStatRegressionModelType type, double a, double b) {
auto res=jkqtpStatGenerateRegressionModel(type);
return std::bind(res, std::placeholders::_1, a, b);
}
std::pair<std::function<double (double)>, std::function<double (double)> > jkqtpStatGenerateTransformation(JKQTPStatRegressionModelType type) {
auto logF=[](double x)->double { return log(x); };
//auto expF=[](double x)->double { return exp(x); };
auto idF=&jkqtp_identity<double>;
switch(type) {
case JKQTPStatRegressionModelType::Linear: return std::pair<std::function<double(double)>,std::function<double(double)> >(idF, idF);
case JKQTPStatRegressionModelType::PowerLaw: return std::pair<std::function<double(double)>,std::function<double(double)> >(logF, logF);
case JKQTPStatRegressionModelType::Exponential: return std::pair<std::function<double(double)>,std::function<double(double)> >(idF, logF);
case JKQTPStatRegressionModelType::Logarithm: return std::pair<std::function<double(double)>,std::function<double(double)> >(logF, idF);
}
throw std::runtime_error("unknown JKQTPStatRegressionModelType in jkqtpStatGenerateTransformation()");
}
std::pair<std::function<double (double)>, std::function<double (double)> > jkqtpStatGenerateParameterATransformation(JKQTPStatRegressionModelType type) {
auto logF=[](double x)->double { return log(x); };
auto expF=[](double x)->double { return exp(x); };
auto idF=&jkqtp_identity<double>;
switch(type) {
case JKQTPStatRegressionModelType::Linear: return std::pair<std::function<double(double)>,std::function<double(double)> >(idF, idF);
case JKQTPStatRegressionModelType::PowerLaw: return std::pair<std::function<double(double)>,std::function<double(double)> >(logF, expF);
case JKQTPStatRegressionModelType::Exponential: return std::pair<std::function<double(double)>,std::function<double(double)> >(logF, expF);
case JKQTPStatRegressionModelType::Logarithm: return std::pair<std::function<double(double)>,std::function<double(double)> >(idF, idF);
}
throw std::runtime_error("unknown JKQTPStatRegressionModelType in jkqtpStatGenerateParameterATransformation()");
}
std::pair<std::function<double (double)>, std::function<double (double)> > jkqtpStatGenerateParameterBTransformation(JKQTPStatRegressionModelType type) {
//auto logF=[](double x)->double { return log(x); };
//auto expF=[](double x)->double { return exp(x); };
auto idF=&jkqtp_identity<double>;
switch(type) {
case JKQTPStatRegressionModelType::Linear: return std::pair<std::function<double(double)>,std::function<double(double)> >(idF, idF);
case JKQTPStatRegressionModelType::PowerLaw: return std::pair<std::function<double(double)>,std::function<double(double)> >(idF, idF);
case JKQTPStatRegressionModelType::Exponential: return std::pair<std::function<double(double)>,std::function<double(double)> >(idF, idF);
case JKQTPStatRegressionModelType::Logarithm: return std::pair<std::function<double(double)>,std::function<double(double)> >(idF, idF);
}
throw std::runtime_error("unknown JKQTPStatRegressionModelType in jkqtpStatGenerateParameterBTransformation()");
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,85 @@
/*
Copyright (c) 2008-2019 Jan W. Krieger (<jan@jkrieger.de>)
last modification: $LastChangedDate$ (revision $Rev$)
This software is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License (LGPL) as published by
the Free Software Foundation, either version 2.1 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License (LGPL) for more details.
You should have received a copy of the GNU Lesser General Public License (LGPL)
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "jkqtpstatkde.h"
double jkqtpstatKernel1DGaussian(double t) {
return exp(-0.5*t*t)/JKQTPSTATISTICS_SQRT_2PI;
}
double jkqtpstatKernel1DCauchy(double t) {
return 1.0/(M_PI*(1.0+t*t));
}
double jkqtpstatKernel1DPicard(double t) {
return exp(-0.5*fabs(t))/2.0;
}
double jkqtpstatKernel1DEpanechnikov(double t) {
return (fabs(t)<1.0)?(0.75*(1.0-t*t)):0.0;
}
double jkqtpstatKernel1DUniform(double t) {
return (fabs(t)<=1.0)?0.5:0.0;
}
double jkqtpstatKernel1DTriangle(double t) {
return (fabs(t)<=1.0)?(1.0-fabs(t)):0.0;
}
double jkqtpstatKernel1DQuartic(double t) {
return (fabs(t)<=1.0)?(15.0/16.0*jkqtp_sqr(1.0-t*t)):0.0;
}
double jkqtpstatKernel1DTriweight(double t) {
return (fabs(t)<1.0)?(35.0/32.0*jkqtp_cube(1.0-t*t)):0.0;
}
double jkqtpstatKernel1DTricube(double t) {
return (fabs(t)<1.0)?(70.0/81.0*jkqtp_cube(1.0-jkqtp_cube(fabs(t)))):0.0;
}
double jkqtpstatKernel1DCosine(double t) {
return (fabs(t)<1.0)?(M_PI/4.0*cos(t*M_PI/2.0)):0.0;
}
double jkqtpstatKernel2DGaussian(double tx, double ty)
{
return exp(-0.5*(tx*tx+ty*ty))/(2.0*M_PI);
}
double jkqtpstatKernel2DUniform(double tx, double ty) {
return (fabs(tx)<1.0 && fabs(ty)<=1.0)?0.25:0.0;
}

View File

@ -0,0 +1,537 @@
/*
Copyright (c) 2008-2019 Jan W. Krieger (<jan@jkrieger.de>)
last modification: $LastChangedDate$ (revision $Rev$)
This software is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License (LGPL) as published by
the Free Software Foundation, either version 2.1 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License (LGPL) for more details.
You should have received a copy of the GNU Lesser General Public License (LGPL)
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef JKQTPSTATKDE_H_INCLUDED
#define JKQTPSTATKDE_H_INCLUDED
#include <stdint.h>
#include <cmath>
#include <stdlib.h>
#include <string.h>
#include <iostream>
#include <stdio.h>
#include <limits>
#include <vector>
#include <utility>
#include <cfloat>
#include <ostream>
#include <iomanip>
#include <sstream>
#include "jkqtcommon/jkqtp_imexport.h"
#include "jkqtcommon/jkqtplinalgtools.h"
#include "jkqtcommon/jkqtparraytools.h"
#include "jkqtcommon/jkqtpdebuggingtools.h"
#include "jkqtcommon/jkqtpstatbasics.h"
/*! \brief a 1D Gaussian kernel function, e.g. for Kernel Density Estimation
\ingroup jkqtptools_math_statistics_1dkde_kernels
\f[ k(t):=\frac{1}{\sqrt{2\pi}}\exp \left(-\frac{1}{2}t^2\right) \f]
*/
JKQTP_LIB_EXPORT double jkqtpstatKernel1DGaussian(double t);
/*! \brief a 1D Cauchy kernel function, e.g. for Kernel Density Estimation
\ingroup jkqtptools_math_statistics_1dkde_kernels
\f[ k(t):=\frac{1}{\pi(1+t^2)} \f]
*/
JKQTP_LIB_EXPORT double jkqtpstatKernel1DCauchy(double t);
/*! \brief a 1D Picard kernel function, e.g. for Kernel Density Estimation
\ingroup jkqtptools_math_statistics_1dkde_kernels
\f[ k(t):=\frac{1}{2}\exp(-|t|) \f]
*/
JKQTP_LIB_EXPORT double jkqtpstatKernel1DPicard(double t);
/*! \brief a 1D Epanechnikov kernel function, e.g. for Kernel Density Estimation
\ingroup jkqtptools_math_statistics_1dkde_kernels
\f[ k(t) :=\begin{cases}\frac{3}{4} ( 1- t^2 ), & \text{if }t\in [-1;1]\\0, & \text{else}\end{cases} \f]
*/
JKQTP_LIB_EXPORT double jkqtpstatKernel1DEpanechnikov(double t);
/*! \brief a 1D uniform kernel function, e.g. for Kernel Density Estimation
\ingroup jkqtptools_math_statistics_1dkde_kernels
\f[ k(t) :=\begin{cases}0.5, & \text{if }t\in [-1;1]\\0, & \text{else}\end{cases} \f]
*/
JKQTP_LIB_EXPORT double jkqtpstatKernel1DUniform(double t);
/*! \brief a 1D Epanechnikov kernel function, e.g. for Kernel Density Estimation
\ingroup jkqtptools_math_statistics_1dkde_kernels
\f[ k(t) :=\begin{cases}1-|t|, & \text{if }t\in [-1;1]\\0, & \text{else}\end{cases} \f]
*/
JKQTP_LIB_EXPORT double jkqtpstatKernel1DTriangle(double t);
/*! \brief a 1D quartic kernel function, e.g. for Kernel Density Estimation
\ingroup jkqtptools_math_statistics_1dkde_kernels
\f[ k(t) :=\begin{cases}\frac{15}{16}(1-t^2)^2, & \text{if }t\in [-1;1]\\0, & \text{else}\end{cases} \f]
*/
JKQTP_LIB_EXPORT double jkqtpstatKernel1DQuartic(double t);
/*! \brief a 1D triweight kernel function, e.g. for Kernel Density Estimation
\ingroup jkqtptools_math_statistics_1dkde_kernels
\f[ k(t) :=\begin{cases}\frac{35}{32}(1-t^2)^3, & \text{if }t\in [-1;1]\\0, & \text{else}\end{cases} \f]
*/
JKQTP_LIB_EXPORT double jkqtpstatKernel1DTriweight(double t);
/*! \brief a 1D tricube kernel function, e.g. for Kernel Density Estimation
\ingroup jkqtptools_math_statistics_1dkde_kernels
\f[ k(t) :=\begin{cases}\frac{70}{81}(1-|t|^3)^3, & \text{if }t\in [-1;1]\\0, & \text{else}\end{cases} \f]
*/
JKQTP_LIB_EXPORT double jkqtpstatKernel1DTricube(double t);
/*! \brief a 1D cosine kernel function, e.g. for Kernel Density Estimation
\ingroup jkqtptools_math_statistics_1dkde_kernels
\f[ k(t) :=\begin{cases}\frac{\pi}{4}\cos\left(\frac{\pi}{2}t\right), & \text{if }t\in [-1;1]\\0, & \text{else}\end{cases} \f]
*/
JKQTP_LIB_EXPORT double jkqtpstatKernel1DCosine(double t);
/*! \brief a 1D Gaussian kernel function, e.g. for Kernel Density Estimation
\ingroup jkqtptools_math_statistics_2dkde_kernels
\f[ k(t_x, t_y):=\frac{1}{2\pi}\exp \left(-\frac{t_x^2+t_y^2}{2}\right) \f]
*/
JKQTP_LIB_EXPORT double jkqtpstatKernel2DGaussian(double tx, double ty);
/*! \brief a 1D Gaussian kernel function, e.g. for Kernel Density Estimation
\ingroup jkqtptools_math_statistics_2dkde_kernels
\f[ k(t_x, t_y):=\begin{cases}\frac{1}{4}, & \text{if }t_x,t_y\in [-1;1]\\0, & \text{else}\end{cases} \f]
*/
JKQTP_LIB_EXPORT double jkqtpstatKernel2DUniform(double tx, double ty);
/*! \brief estimates a bandwidth for a Kernel Density Estimator (KDE) of the given data \a first ... \a last
\ingroup jkqtptools_math_statistics_1dkde
evaluates \f[ h = \left(\frac{4\hat{\sigma}^5}{3n}\right)^{\frac{1}{5}} \approx 1.06 \hat{\sigma} n^{-1/5} \f]
\tparam InputIt standard iterator type of \a first and \a last.
\param first iterator pointing to the first item in the dataset to use \f$ X_1 \f$
\param last iterator pointing behind the last item in the dataset to use \f$ X_N \f$
\return the estimated bandwidth
*/
template <class InputIt>
inline double jkqtpstatEstimateKDEBandwidth(InputIt first, InputIt last) {
size_t N=0;
const double sigma=jkqtpstatStdDev(first, last, nullptr, &N);
return 1.06*sigma/pow(static_cast<double>(N), 1.0/5.0);
}
/*! \brief evaluates the Kernel Density Estimator (KDE) at a given position
\ingroup jkqtptools_math_statistics_1dkde
evaluates \f[ \tilde{f}(t):=\frac{1}{N\cdot\text{bandwidth}}\cdot\sum\limits_{i=0}^{N-1}K\left(\frac{t-x_i}{\text{bandwidth}}\right) \f]
\tparam InputIt standard iterator type of \a first and \a last.
\param t where to evaluate the kernel sum
\param first iterator pointing to the first item in the dataset to use \f$ X_1 \f$
\param last iterator pointing behind the last item in the dataset to use \f$ X_N \f$
\param kernel the kernel function to use (e.g. jkqtpstatKernel1DGaussian() )
\param bandwidth bandwidth used for the KDE
*/
template <class InputIt>
inline double jkqtpstatEvaluateKernelSum(double t, InputIt first, InputIt last, const std::function<double(double)>& kernel, double bandwidth) {
double res=0;
size_t cnt=0;
for (auto it=first; it!=last; ++it) {
const double v=jkqtp_todouble(*it);
if (JKQTPIsOKFloat(v)) {
const double vx=(t-v)/bandwidth;
res+=kernel(vx);
cnt++;
}
}
if (cnt==0) return 0.0;
return res/static_cast<double>(cnt)/bandwidth;
}
/*! \brief calculate an autoranged 1-dimensional Kernel Density Estimation (KDE) from the given data range \a first ... \a last, bins defined by their number
\ingroup jkqtptools_math_statistics_1dkde
\tparam InputIt standard iterator type of \a first and \a last.
\tparam OutputIt standard output iterator type used for the outliers output \a KDEXOut and \a KDEYOut, use e.g. std::back_inserter
\param first iterator pointing to the first item in the dataset to use \f$ X_1 \f$
\param last iterator pointing behind the last item in the dataset to use \f$ X_N \f$
\param kernel the kernel function to use (e.g. jkqtpstatKernel1DGaussian() )
\param bandwidth bandwidth used for the KDE
\param[out] KDEXOut output iterator that receives x-positions of the KDE bins. Location of this value inside the bin range is defined by \a binXMode
\param[out] KDEYOut output iterator that receives counts/frequencies of the KDE bins
\param Nout number datapoints in the output KDE
\param cummulative if \c true, a cummulative KDE is calculated
This function performs <a href="https://en.wikipedia.org/wiki/Kernel_density_estimation">Kernel Density Estimation</a> for a given data array.
Then the resulting density is evaluated on a regular grid spanning [min(X)...max(X)] with bins datapoints in between.
\warning this functions is getting very slow for large dataset, as for each point in the resulting histogram N kernel functions have to be evaluated.
\see https://en.wikipedia.org/wiki/Kernel_density_estimation, \ref JKQTPlotterBasicJKQTPDatastoreStatistics
*/
template <class InputIt, class OutputIt>
inline void jkqtpstatKDE1DAutoranged(InputIt first, InputIt last, OutputIt KDEXOut, OutputIt KDEYOut, int Nout=100, const std::function<double(double)>& kernel=std::function<double(double)>(&jkqtpstatKernel1DGaussian), double bandwidth=1.0, bool cummulative=false) {
double minV=0, maxV=0;
size_t N=0;
jkqtpstatMinMax<InputIt>(first, last, minV, maxV, nullptr, nullptr, &N);
std::vector<double> histX;
std::vector<double> histY;
const double range=maxV-minV;
const double binw=range/static_cast<double>(Nout);
// calculate the KDE
for (int i=0; i<Nout; i++) {
const double x=minV+static_cast<double>(i)*binw+binw/2.0;
histX.push_back(x);
histY.push_back(jkqtpstatEvaluateKernelSum(x, first, last, kernel, bandwidth));
}
// output the KDE
double h=0;
for (size_t i=0; i<histX.size(); i++) {
*++KDEXOut=histX[i];
if (cummulative) h+=histY[i];
else h=histY[i];
*++KDEYOut=h;
}
}
/*! \brief calculate an autoranged 1-dimensional Kernel Density Estimation (KDE) from the given data range \a first ... \a last, bins defined by their number
\ingroup jkqtptools_math_statistics_1dkde
\tparam InputIt standard iterator type of \a first and \a last.
\tparam OutputIt standard output iterator type used for the outliers output \a KDEXOut and \a KDEYOut, use e.g. std::back_inserter
\param first iterator pointing to the first item in the dataset to use \f$ X_1 \f$
\param last iterator pointing behind the last item in the dataset to use \f$ X_N \f$
\param kernel the kernel function to use (e.g. jkqtpstatKernel1DGaussian() )
\param bandwidth bandwidth used for the KDE
\param[out] KDEXOut output iterator that receives x-positions of the KDE bins. Location of this value inside the bin range is defined by \a binXMode
\param[out] KDEYOut output iterator that receives counts/frequencies of the KDE bins
\param binWidth width of the bins
\param cummulative if \c true, a cummulative KDE is calculated
This function performs <a href="https://en.wikipedia.org/wiki/Kernel_density_estimation">Kernel Density Estimation</a> for a given data array.
Then the resulting density is evaluated on a regular grid spanning [min(X)...max(X)] with bins datapoints in between.
\warning this functions is getting very slow for large dataset, as for each point in the resulting histogram N kernel functions have to be evaluated.
\see https://en.wikipedia.org/wiki/Kernel_density_estimation, \ref JKQTPlotterBasicJKQTPDatastoreStatistics
*/
template <class InputIt, class OutputIt>
inline void jkqtpstatKDE1DAutoranged(InputIt first, InputIt last, OutputIt KDEXOut, OutputIt KDEYOut, double binWidth, const std::function<double(double)>& kernel=std::function<double(double)>(&jkqtpstatKernel1DGaussian), double bandwidth=1.0, bool cummulative=false) {
double minV=0, maxV=0;
size_t N=0;
jkqtpstatMinMax<InputIt>(first, last, minV, maxV, nullptr, nullptr, &N);
std::vector<double> histX;
std::vector<double> histY;
const double range=maxV-minV;
const double binw=binWidth;
const int Nout=static_cast<int>(ceil(range/binWidth));
// calculate the KDE
for (int i=0; i<Nout; i++) {
const double xi=minV+static_cast<double>(i)*binw+binw/2.0;
histX.push_back(xi);
histY.push_back(jkqtpstatEvaluateKernelSum(xi, first, last, kernel, bandwidth));
}
// output the KDE
double h=0;
for (size_t i=0; i<histX.size(); i++) {
*++KDEXOut=histX[i];
if (cummulative) h+=histY[i];
else h=histY[i];
*++KDEYOut=h;
}
}
/*! \brief calculate an autoranged 1-dimensional Kernel Density Estimation (KDE) from the given data range \a first ... \a last, bins defined the range \a binsFirst ... \a binsLast
\ingroup jkqtptools_math_statistics_1dkde
\tparam InputIt standard iterator type of \a first and \a last.
\tparam BinsInputIt standard iterator type of \a binsFirst and \a binsLast.
\tparam OutputIt standard output iterator type used for the outliers output \a KDEXOut and \a KDEYOut, use e.g. std::back_inserter
\param first iterator pointing to the first item in the dataset to use \f$ X_1 \f$
\param last iterator pointing behind the last item in the dataset to use \f$ X_N \f$
\param binsFirst iterator pointing to the first item in the set of KDE bins
\param binsLast iterator pointing behind the last item in the set of KDE bins
\param[out] KDEXOut output iterator that receives x-positions of the KDE bins. Location of this value inside the bin range is defined by \a binXMode
\param[out] KDEYOut output iterator that receives counts/frequencies of the KDE bins
\param kernel the kernel function to use (e.g. jkqtpstatKernel1DGaussian() )
\param bandwidth bandwidth used for the KDE
\param cummulative if \c true, a cummulative KDE is calculated
\see https://en.wikipedia.org/wiki/Kernel_density_estimation, \ref JKQTPlotterBasicJKQTPDatastoreStatistics
*/
template <class InputIt, class BinsInputIt, class OutputIt>
inline void jkqtpstatKDE1D(InputIt first, InputIt last, BinsInputIt binsFirst, BinsInputIt binsLast, OutputIt KDEXOut, OutputIt KDEYOut, const std::function<double(double)>& kernel=std::function<double(double)>(&jkqtpstatKernel1DGaussian), double bandwidth=1.0, bool cummulative=false) {
double minV=0, maxV=0;
size_t N=0;
jkqtpstatMinMax<InputIt>(first, last, minV, maxV, nullptr, nullptr, &N);
std::vector<double> histX;
std::vector<double> histY;
// initialize the KDE
for (auto it=binsFirst; it!=binsLast; ++it) {
histX.push_back(jkqtp_todouble(*it));
}
std::sort(histX.begin(), histX.end());
// calculate the KDE
for (auto it=histX.begin(); it!=histX.end(); ++it) {
histY.push_back(jkqtpstatEvaluateKernelSum(*it, first, last, kernel, bandwidth));
}
// output the KDE
double h=0;
for (size_t i=0; i<histX.size(); i++) {
*++KDEXOut=histX[i];
if (cummulative) h+=histY[i];
else h=histY[i];
*++KDEYOut=h;
}
}
/*! \brief calculate an autoranged 1-dimensional Kernel Density Estimation (KDE) from the given data range \a first ... \a last, evaluation positions are given by the range \a binXLeft ... \a binXRight (in steps of \a binxDelta )
\ingroup jkqtptools_math_statistics_1dkde
\tparam InputIt standard iterator type of \a first and \a last.
\tparam OutputIt standard output iterator type used for the outliers output \a KDEXOut and \a KDEYOut, use e.g. std::back_inserter
\param first iterator pointing to the first item in the dataset to use \f$ X_1 \f$
\param last iterator pointing behind the last item in the dataset to use \f$ X_N \f$
\param binXLeft first x-position, where to evaluate the KDE
\param binXDelta distance between two x-positions at which the KDE is evaluated
\param binXRight last x-position, where to evaluate the KDE
\param[out] KDEXOut output iterator that receives x-positions of the KDE bins. Location of this value inside the bin range is defined by \a binXMode
\param[out] KDEYOut output iterator that receives counts/frequencies of the KDE bins
\param kernel the kernel function to use (e.g. jkqtpstatKernel1DGaussian() )
\param bandwidth bandwidth used for the KDE
\param cummulative if \c true, a cummulative KDE is calculated
\see https://en.wikipedia.org/wiki/Kernel_density_estimation, \ref JKQTPlotterBasicJKQTPDatastoreStatistics
*/
template <class InputIt, class OutputIt>
inline void jkqtpstatKDE1D(InputIt first, InputIt last, double binXLeft, double binXDelta, double binXRight, OutputIt KDEXOut, OutputIt KDEYOut, const std::function<double(double)>& kernel=std::function<double(double)>(&jkqtpstatKernel1DGaussian), double bandwidth=1.0, bool cummulative=false) {
double minV=0, maxV=0;
size_t N=0;
jkqtpstatMinMax<InputIt>(first, last, minV, maxV, nullptr, nullptr, &N);
std::vector<double> histX;
std::vector<double> histY;
// calculate the KDE
for (double x=binXLeft; x<=binXRight; x+=binXDelta) {
histX.push_back(x);
histY.push_back(jkqtpstatEvaluateKernelSum(x, first, last, kernel, bandwidth));
}
// output the KDE
double h=0;
for (size_t i=0; i<histX.size(); i++) {
*++KDEXOut=histX[i];
if (cummulative) h+=histY[i];
else h=histY[i];
*++KDEYOut=h;
}
}
/*! \brief evaluates the Kernel Density Estimator (KDE) at a given position
\ingroup jkqtptools_math_statistics_1dkde
evaluates \f[ \tilde{f}(x,y):=\frac{1}{N\cdot\sqrt{\text{bandwidthx}}\cdot\sqrt{\text{bandwidthy}}}\cdot\sum\limits_{i=0}^{N-1}K\left(\frac{x-x_i}{\text{bandwidthx}},\frac{y-y_i}{\text{bandwidthy}}\right) \f]
\tparam InputItX standard iterator type of \a firstX and \a lastX.
\tparam InputItY standard iterator type of \a firstY and \a lastY.
\param x where to evaluate the kernel sum, x-coordinate
\param y where to evaluate the kernel sum, y-coordinate
\param firstX iterator pointing to the first x-position item in the dataset to use \f$ X_1 \f$
\param lastX iterator pointing behind the last x-position item in the dataset to use \f$ X_N \f$
\param firstY iterator pointing to the first y-position item in the dataset to use \f$ Y_1 \f$
\param lastY iterator pointing behind the last y-position item in the dataset to use \f$ Y_N \f$
\param kernel the kernel function to use (e.g. jkqtpstatKernel1DGaussian() )
\param kernel the kernel function to use (e.g. jkqtpstatKernel2DGaussian() )
\param bandwidthX x-bandwidth used for the KDE
\param bandwidthY y-bandwidth used for the KDE
*/
template <class InputItX, class InputItY>
inline double jkqtpstatEvaluateKernelSum2D(double x, double y, InputItX firstX, InputItX lastX, InputItY firstY, InputItY lastY, const std::function<double(double,double)>& kernel, double bandwidthX, double bandwidthY) {
double res=0;
size_t cnt=0;
auto itX=firstX;
auto itY=firstY;
for (; (itX!=lastX)&&(itY!=lastY); ++itX, ++itY) {
const double vx=jkqtp_todouble(*itX);
const double vy=jkqtp_todouble(*itY);
if (JKQTPIsOKFloat(vx) && JKQTPIsOKFloat(vy)) {
const double vvx=(x-vx)/bandwidthX;
const double vvy=(y-vy)/bandwidthY;
res+=kernel(vvx,vvy);
cnt++;
}
}
if (cnt==0) return 0.0;
return res/static_cast<double>(cnt)/sqrt(bandwidthX*bandwidthY);
}
/*! \brief calculate an autoranged 2-dimensional Kernel Density Estimation (KDE) from the given data range \a firstX / \a firstY ... \a lastY / \a lastY
\ingroup jkqtptools_math_statistics_2dkde
\tparam InputItX standard iterator type of \a firstX and \a lastX.
\tparam InputItY standard iterator type of \a firstY and \a lastY.
\tparam OutputIt standard output iterator type used for the outliers output \a histogramXOut and \a histogramYOut, use e.g. std::back_inserter
\param firstX iterator pointing to the first x-position item in the dataset to use \f$ X_1 \f$
\param lastX iterator pointing behind the last x-position item in the dataset to use \f$ X_N \f$
\param firstY iterator pointing to the first y-position item in the dataset to use \f$ Y_1 \f$
\param lastY iterator pointing behind the last y-position item in the dataset to use \f$ Y_N \f$
\param[out] histogramImgOut output iterator that receives counts of the histogram bins in row-major ordering
\param xmin position of the first histogram bin in x-direction
\param xmax position of the last histogram bin in x-direction
\param ymin position of the first histogram bin in y-direction
\param ymax position of the last histogram bin in y-direction
\param xbins number of bins in x-direction (i.e. width of the output histogram \a histogramImgOut )
\param ybins number of bins in y-direction (i.e. height of the output histogram \a histogramImgOut )
\param kernel the kernel function to use (e.g. jkqtpstatKernel2DGaussian() )
\param bandwidthX x-bandwidth used for the KDE
\param bandwidthY y-bandwidth used for the KDE
\see https://en.wikipedia.org/wiki/Multivariate_kernel_density_estimation, \ref JKQTPlotterBasicJKQTPDatastoreStatistics
*/
template <class InputItX, class InputItY, class OutputIt>
inline void jkqtpstatKDE2D(InputItX firstX, InputItX lastX, InputItY firstY, InputItY lastY, OutputIt histogramImgOut, double xmin, double xmax, double ymin, double ymax, size_t xbins, size_t ybins, const std::function<double(double,double)>& kernel=std::function<double(double,double)>(&jkqtpstatKernel2DGaussian), double bandwidthX=1.0, double bandwidthY=1.0) {
const double binwx=fabs(xmax-xmin)/static_cast<double>(xbins);
const double binwy=fabs(ymax-ymin)/static_cast<double>(ybins);
double y=ymin;
auto itOut=histogramImgOut;
for (size_t by=0; by<ybins; by++) {
double x=xmin;
for (size_t bx=0; bx<xbins; bx++) {
const double vv=jkqtpstatEvaluateKernelSum2D(x,y, firstX, lastX,firstY,lastY, kernel, bandwidthX,bandwidthY);
*itOut=vv;
//std::cout<<x<<","<<y<<","<<vv<<*itOut<<std::endl;
x+=binwx;
++itOut;
}
y+=binwy;
}
}
/*! \brief estimates a bandwidth for a 2-dimensional Kernel Density Estimator (KDE) of the given data \a first ... \a last using Scott's rule
\ingroup jkqtptools_math_statistics_2dkde
evaluates \f[ h = \hat{\sigma} n^{-1/(d+4)},\ \ \ \ \ d=2 \f]
\tparam InputIt standard iterator type of \a first and \a last.
\param first iterator pointing to the first item in the dataset to use \f$ X_1 \f$
\param last iterator pointing behind the last item in the dataset to use \f$ X_N \f$
\return the estimated bandwidth
\see https://en.wikipedia.org/wiki/Multivariate_kernel_density_estimation#Rule_of_thumb
*/
template <class InputIt>
inline double jkqtpstatEstimateKDEBandwidth2D(InputIt first, InputIt last) {
size_t N=0;
const double sigma=jkqtpstatStdDev(first, last, nullptr, &N);
return sigma/pow(static_cast<double>(N), 1.0/(2.0+4.0));
}
#endif // JKQTPSTATKDE_H_INCLUDED

View File

@ -0,0 +1,23 @@
/*
Copyright (c) 2008-2019 Jan W. Krieger (<jan@jkrieger.de>)
last modification: $LastChangedDate$ (revision $Rev$)
This software is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License (LGPL) as published by
the Free Software Foundation, either version 2.1 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License (LGPL) for more details.
You should have received a copy of the GNU Lesser General Public License (LGPL)
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "jkqtpstatpoly.h"

View File

@ -0,0 +1,169 @@
/*
Copyright (c) 2008-2019 Jan W. Krieger (<jan@jkrieger.de>)
last modification: $LastChangedDate$ (revision $Rev$)
This software is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License (LGPL) as published by
the Free Software Foundation, either version 2.1 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License (LGPL) for more details.
You should have received a copy of the GNU Lesser General Public License (LGPL)
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef JKQTPSTATPOLY_H_INCLUDED
#define JKQTPSTATPOLY_H_INCLUDED
#include <stdint.h>
#include <cmath>
#include <stdlib.h>
#include <string.h>
#include <iostream>
#include <stdio.h>
#include <limits>
#include <vector>
#include <utility>
#include <cfloat>
#include <ostream>
#include <iomanip>
#include <sstream>
#include "jkqtcommon/jkqtp_imexport.h"
#include "jkqtcommon/jkqtplinalgtools.h"
#include "jkqtcommon/jkqtparraytools.h"
#include "jkqtcommon/jkqtpdebuggingtools.h"
/*! \brief fits (in a least-squares sense) a polynomial \f$ f(x)=\sum\limits_{i=0}^Pp_ix^i \f$ of order P to a set of N data pairs \f$ (x_i,y_i) \f$
\ingroup jkqtptools_math_statistics_poly
\ingroup jkqtptools_math_statistics_regression
\tparam InputItX standard iterator type of \a firstX and \a lastX.
\tparam InputItY standard iterator type of \a firstY and \a lastY.
\tparam OutputItP output iterator for the polynomial coefficients
\param type model to be fitted
\param firstX iterator pointing to the first item in the x-dataset to use \f$ x_1 \f$
\param lastX iterator pointing behind the last item in the x-dataset to use \f$ x_N \f$
\param firstY iterator pointing to the first item in the y-dataset to use \f$ y_1 \f$
\param lastY iterator pointing behind the last item in the y-dataset to use \f$ y_N \f$
\param P degree of the polynomial (P>=N !!!)
\param[out] firstRes Iterator (of type \a OutputItP ), which receives the (P+1)-entry vector with the polynomial coefficients \f$ p_i \f$
This function uses jkqtpstatLinSolve() to solve the system of equations
\f[ \begin{bmatrix} y_1\\ y_2\\ y_3 \\ \vdots \\ y_n \end{bmatrix}= \begin{bmatrix} 1 & x_1 & x_1^2 & \dots & x_1^P \\ 1 & x_2 & x_2^2 & \dots & x_2^P\\ 1 & x_3 & x_3^2 & \dots & x_3^P \\ \vdots & \vdots & \vdots & & \vdots \\ 1 & x_n & x_n^2 & \dots & x_n^P \end{bmatrix} \begin{bmatrix} p_0\\ p_1\\ p_2\\ \vdots \\ p_P \end{bmatrix} \f]
\f[ \vec{y}=V\vec{p}\ \ \ \ \ \Rightarrow\ \ \ \ \ \vec{p}=(V^TV)^{-1}V^T\vec{y} \f]
\image html jkqtplotter_simpletest_datastore_regression_polynom.png
\see https://en.wikipedia.org/wiki/Polynomial_regression
*/
template <class InputItX, class InputItY, class OutputItP>
inline void jkqtpstatPolyFit(InputItX firstX, InputItX lastX, InputItY firstY, InputItY lastY, size_t P, OutputItP firstRes) {
{
const int Nx=std::distance(firstX,lastX);
const int Ny=std::distance(firstY,lastY);
JKQTPASSERT(Nx>1 && Ny>1);
}
size_t N=0;
std::vector<double> X,Y;
auto itX=firstX;
auto itY=firstY;
for (; itX!=lastX && itY!=lastY; ++itX, ++itY) {
const double fit_x=jkqtp_todouble(*itX);
const double fit_y=jkqtp_todouble(*itY);
if (JKQTPIsOKFloat(fit_x) && JKQTPIsOKFloat(fit_y)) {
X.push_back(fit_x);
Y.push_back(fit_y);
N++;
}
}
// build Vandermonde matrix V
std::vector<double> V;
V.resize(N*(P+1));
for (size_t l=0; l<N; l++) {
V[jkqtplinalgMatIndex(l,0,P+1)]=1.0;
double x=X[l];
const double xx=x;
for (size_t c=1; c<P+1; c++) {
V[jkqtplinalgMatIndex(l,c,P+1)]=x;
x=x*xx;
}
}
#ifdef STATISTICS_TOOLS_DEBUG_statisticsPolyFit
std::cout<<"V = \n";
jkqtplinalgPrintMatrix(V.data(),N,P+1);
std::cout<<"\n";
#endif
// calculate V^T
std::vector<double> VT=V;
jkqtplinalgTransposeMatrix(VT.data(), static_cast<long>(N), static_cast<long>(P+1));
#ifdef STATISTICS_TOOLS_DEBUG_statisticsPolyFit
std::cout<<"V^T = \n";
jkqtplinalgPrintMatrix(VT.data(),P+1,N);
std::cout<<"\n";
#endif
// calculate V^T*V
std::vector<double> VTV;
VTV.resize((P+1)*(P+1));
jkqtplinalgMatrixProduct(VT.data(), static_cast<long>(P+1), static_cast<long>(N), V.data(), static_cast<long>(N), static_cast<long>(P+1), VTV.data());
#ifdef STATISTICS_TOOLS_DEBUG_statisticsPolyFit
std::cout<<"V^T*V = \n";
jkqtplinalgPrintMatrix(VTV.data(),P+1,P+1);
std::cout<<"\n";
#endif
// calculate V^T*y
std::vector<double> VTY;
VTY.resize(P+1);
jkqtplinalgMatrixProduct(VT.data(), static_cast<long>(P+1), static_cast<long>(N), Y.data(), static_cast<long>(N), 1, VTY.data());
#ifdef STATISTICS_TOOLS_DEBUG_statisticsPolyFit
std::cout<<"V^T*y = \n";
jkqtplinalgPrintMatrix(VTY.data(),P+1,1);
std::cout<<"\n";
#endif
// solve V^T*y = V^T*V*p
const bool ok=jkqtplinalgLinSolve(VTV.data(), VTY.data(), static_cast<long>(P+1));
if (ok) {
auto itR=firstRes;
for (size_t p=0; p<P+1; p++) {
*++itR=VTY[p];
}
} else {
throw std::runtime_error("jkqtplinalgLinSolve() didn't return a result!");
}
#ifdef STATISTICS_TOOLS_DEBUG_statisticsPolyFit
std::cout<<"result_out = \n";
jkqtplinalgPrintMatrix(result_out,P+1,1);
std::cout<<"\n";
#endif
}
#endif // JKQTPSTATPOLY_H_INCLUDED

View File

@ -0,0 +1,87 @@
/*
Copyright (c) 2008-2019 Jan W. Krieger (<jan@jkrieger.de>)
last modification: $LastChangedDate$ (revision $Rev$)
This software is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License (LGPL) as published by
the Free Software Foundation, either version 2.1 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License (LGPL) for more details.
You should have received a copy of the GNU Lesser General Public License (LGPL)
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "jkqtpstatregression.h"
std::function<double (double, double, double)> jkqtpStatGenerateRegressionModel(JKQTPStatRegressionModelType type) {
switch(type) {
case JKQTPStatRegressionModelType::Linear: return [](double x, double a, double b)->double { return a+b*x; };
case JKQTPStatRegressionModelType::PowerLaw: return [](double x, double a, double b)->double { return a*pow(x,b); };
case JKQTPStatRegressionModelType::Exponential: return [](double x, double a, double b)->double { return a*exp(b*x); };
case JKQTPStatRegressionModelType::Logarithm: return [](double x, double a, double b)->double { return a+b*log(x); };
}
throw std::runtime_error("unknown JKQTPStatRegressionModelType in jkqtpStatGenerateRegressionModel()");
}
QString jkqtpstatRegressionModel2Latex(JKQTPStatRegressionModelType type, double a, double b) {
switch(type) {
case JKQTPStatRegressionModelType::Linear: return QString("f(x)=%1%2{\\cdot}x").arg(jkqtp_floattolatexqstr(a, 2, true, 1e-16,1e-2, 1e4,false)).arg(jkqtp_floattolatexqstr(b, 2, true, 1e-16,1e-2, 1e4,true));
case JKQTPStatRegressionModelType::PowerLaw: return QString("f(x)=%1{\\cdot}x^{%2}").arg(jkqtp_floattolatexqstr(a, 3)).arg(jkqtp_floattolatexqstr(b, 3));
case JKQTPStatRegressionModelType::Exponential: return QString("f(x)=%1{\\cdot}\\exp(%2{\\cdot}x)").arg(jkqtp_floattolatexqstr(a, 3)).arg(jkqtp_floattolatexqstr(b, 3));
case JKQTPStatRegressionModelType::Logarithm: return QString("f(x)=%1%2{\\cdot}\\ln(x)").arg(jkqtp_floattolatexqstr(a, 2, true, 1e-16,1e-2, 1e4,false)).arg(jkqtp_floattolatexqstr(b, 2, true, 1e-16,1e-2, 1e4,true));
}
throw std::runtime_error("unknown JKQTPStatRegressionModelType in jkqtpstatRegressionModel2Latex()");
}
std::function<double (double)> jkqtpStatGenerateRegressionModel(JKQTPStatRegressionModelType type, double a, double b) {
auto res=jkqtpStatGenerateRegressionModel(type);
return std::bind(res, std::placeholders::_1, a, b);
}
std::pair<std::function<double (double)>, std::function<double (double)> > jkqtpStatGenerateTransformation(JKQTPStatRegressionModelType type) {
auto logF=[](double x)->double { return log(x); };
//auto expF=[](double x)->double { return exp(x); };
auto idF=&jkqtp_identity<double>;
switch(type) {
case JKQTPStatRegressionModelType::Linear: return std::pair<std::function<double(double)>,std::function<double(double)> >(idF, idF);
case JKQTPStatRegressionModelType::PowerLaw: return std::pair<std::function<double(double)>,std::function<double(double)> >(logF, logF);
case JKQTPStatRegressionModelType::Exponential: return std::pair<std::function<double(double)>,std::function<double(double)> >(idF, logF);
case JKQTPStatRegressionModelType::Logarithm: return std::pair<std::function<double(double)>,std::function<double(double)> >(logF, idF);
}
throw std::runtime_error("unknown JKQTPStatRegressionModelType in jkqtpStatGenerateTransformation()");
}
std::pair<std::function<double (double)>, std::function<double (double)> > jkqtpStatGenerateParameterATransformation(JKQTPStatRegressionModelType type) {
auto logF=[](double x)->double { return log(x); };
auto expF=[](double x)->double { return exp(x); };
auto idF=&jkqtp_identity<double>;
switch(type) {
case JKQTPStatRegressionModelType::Linear: return std::pair<std::function<double(double)>,std::function<double(double)> >(idF, idF);
case JKQTPStatRegressionModelType::PowerLaw: return std::pair<std::function<double(double)>,std::function<double(double)> >(logF, expF);
case JKQTPStatRegressionModelType::Exponential: return std::pair<std::function<double(double)>,std::function<double(double)> >(logF, expF);
case JKQTPStatRegressionModelType::Logarithm: return std::pair<std::function<double(double)>,std::function<double(double)> >(idF, idF);
}
throw std::runtime_error("unknown JKQTPStatRegressionModelType in jkqtpStatGenerateParameterATransformation()");
}
std::pair<std::function<double (double)>, std::function<double (double)> > jkqtpStatGenerateParameterBTransformation(JKQTPStatRegressionModelType type) {
//auto logF=[](double x)->double { return log(x); };
//auto expF=[](double x)->double { return exp(x); };
auto idF=&jkqtp_identity<double>;
switch(type) {
case JKQTPStatRegressionModelType::Linear: return std::pair<std::function<double(double)>,std::function<double(double)> >(idF, idF);
case JKQTPStatRegressionModelType::PowerLaw: return std::pair<std::function<double(double)>,std::function<double(double)> >(idF, idF);
case JKQTPStatRegressionModelType::Exponential: return std::pair<std::function<double(double)>,std::function<double(double)> >(idF, idF);
case JKQTPStatRegressionModelType::Logarithm: return std::pair<std::function<double(double)>,std::function<double(double)> >(idF, idF);
}
throw std::runtime_error("unknown JKQTPStatRegressionModelType in jkqtpStatGenerateParameterBTransformation()");
}

View File

@ -0,0 +1,632 @@
/*
Copyright (c) 2008-2019 Jan W. Krieger (<jan@jkrieger.de>)
last modification: $LastChangedDate$ (revision $Rev$)
This software is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License (LGPL) as published by
the Free Software Foundation, either version 2.1 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License (LGPL) for more details.
You should have received a copy of the GNU Lesser General Public License (LGPL)
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef JKQTPSTATREGRESSION_H_INCLUDED
#define JKQTPSTATREGRESSION_H_INCLUDED
#include <stdint.h>
#include <cmath>
#include <stdlib.h>
#include <string.h>
#include <iostream>
#include <stdio.h>
#include <limits>
#include <vector>
#include <utility>
#include <cfloat>
#include <ostream>
#include <iomanip>
#include <sstream>
#include "jkqtcommon/jkqtp_imexport.h"
#include "jkqtcommon/jkqtplinalgtools.h"
#include "jkqtcommon/jkqtparraytools.h"
#include "jkqtcommon/jkqtpdebuggingtools.h"
#include "jkqtcommon/jkqtpstatbasics.h"
#include "jkqtcommon/jkqtpstatpoly.h"
/*! \brief calculate the linear regression coefficients for a given data range \a firstX / \a firstY ... \a lastX / \a lastY where the model is \f$ f(x)=a+b\cdot x \f$
So this function solves the least-squares optimization problem: \f[ (a^\ast, b^\ast)=\mathop{\mathrm{arg\;min}}\limits_{a,b}\sum\limits_i\left(y_i-(a+b\cdot x_i)\right)^2 \f]
\ingroup jkqtptools_math_statistics_regression
\tparam InputItX standard iterator type of \a firstX and \a lastX.
\tparam InputItY standard iterator type of \a firstY and \a lastY.
\param firstX iterator pointing to the first item in the x-dataset to use \f$ x_1 \f$
\param lastX iterator pointing behind the last item in the x-dataset to use \f$ x_N \f$
\param firstY iterator pointing to the first item in the y-dataset to use \f$ y_1 \f$
\param lastY iterator pointing behind the last item in the y-dataset to use \f$ y_N \f$
\param[in,out] coeffA returns the offset of the linear model
\param[in,out] coeffB returns the slope of the linear model
\param fixA if \c true, the offset coefficient \f$ a \f$ is not determined by the fit, but the value provided in \a coeffA is used
\param fixB if \c true, the slope coefficient \f$ b \f$ is not determined by the fit, but the value provided in \a coeffB is used
This function computes internally:
\f[ a=\overline{y}-b\cdot\overline{x} \f]
\f[ b=\frac{\sum x_iy_i-N\cdot\overline{x}\cdot\overline{y}}{\sum x_i^2-N\cdot(\overline{x})^2} \f]
\image html jkqtplotter_simpletest_datastore_regression_lin.png
*/
template <class InputItX, class InputItY>
inline void jkqtpstatLinearRegression(InputItX firstX, InputItX lastX, InputItY firstY, InputItY lastY, double& coeffA, double& coeffB, bool fixA=false, bool fixB=false) {
if (fixA&&fixB) return;
const int Nx=std::distance(firstX,lastX);
const int Ny=std::distance(firstY,lastY);
JKQTPASSERT(Nx>1 && Ny>1);
double sumx=0, sumy=0, sumxy=0, sumx2=0;
size_t N=0;
auto itX=firstX;
auto itY=firstY;
for (; itX!=lastX && itY!=lastY; ++itX, ++itY) {
const double fit_x=jkqtp_todouble(*itX);
const double fit_y=jkqtp_todouble(*itY);
if (JKQTPIsOKFloat(fit_x) && JKQTPIsOKFloat(fit_y)) {
sumx=sumx+fit_x;
sumy=sumy+fit_y;
sumxy=sumxy+fit_x*fit_y;
sumx2=sumx2+fit_x*fit_x;
N++;
}
}
const double NN=static_cast<double>(N);
JKQTPASSERT_M(NN>1, "too few datapoints");
if (!fixA && !fixB) {
coeffB=(double(sumxy)-double(sumx)*double(sumy)/NN)/(double(sumx2)-double(sumx)*double(sumx)/NN);;
coeffA=double(sumy)/NN-coeffB*double(sumx)/NN;
} else if (fixA && !fixB) {
coeffB=(double(sumy)/NN-coeffA)/(double(sumx)/NN);
} else if (!fixA && fixB) {
coeffA=double(sumy)/NN-coeffB*double(sumx)/NN;
}
}
/*! \brief calculate the weighted linear regression coefficients for a given for a given data range \a firstX / \a firstY / \a firstW ... \a lastX / \a lastY / \a lastW where the model is \f$ f(x)=a+b\cdot x \f$
So this function solves the least-squares optimization problem: \f[ (a^\ast, b^\ast)=\mathop{\mathrm{arg\;min}}\limits_{a,b}\sum\limits_iw_i^2\cdot\left(y_i-(a+b\cdot x_i)\right)^2 \f]
\ingroup jkqtptools_math_statistics_regression
\tparam InputItX standard iterator type of \a firstX and \a lastX.
\tparam InputItY standard iterator type of \a firstY and \a lastY.
\tparam InputItW standard iterator type of \a firstW and \a lastW.
\param firstX iterator pointing to the first item in the x-dataset to use \f$ x_1 \f$
\param lastX iterator pointing behind the last item in the x-dataset to use \f$ x_N \f$
\param firstY iterator pointing to the first item in the y-dataset to use \f$ y_1 \f$
\param lastY iterator pointing behind the last item in the y-dataset to use \f$ y_N \f$
\param firstW iterator pointing to the first item in the weight-dataset to use \f$ w_1 \f$
\param lastW iterator pointing behind the last item in the weight-dataset to use \f$ w_N \f$
\param[in,out] coeffA returns the offset of the linear model
\param[in,out] coeffB returns the slope of the linear model
\param fixA if \c true, the offset coefficient \f$ a \f$ is not determined by the fit, but the value provided in \a coeffA is used
\param fixB if \c true, the slope coefficient \f$ b \f$ is not determined by the fit, but the value provided in \a coeffB is used
\param fWeightDataToWi an optional function, which is applied to the data from \a firstW ... \a lastW to convert them to weight, i.e. \c wi=fWeightDataToWi(*itW)
e.g. if you use data used to draw error bars, you can use jkqtp_inversePropSaveDefault(). The default is jkqtp_identity(), which just returns the values.
In the case of jkqtp_inversePropSaveDefault(), a datapoint x,y, has a large weight, if it's error is small and in the case if jkqtp_identity() it's weight
is directly proportional to the given value.
This function internally computes:
\f[ a=\frac{\overline{y}-b\cdot\overline{x}}{\overline{w^2}} \f]
\f[ b=\frac{\overline{w^2}\cdot\overline{x\cdot y}-\overline{x}\cdot\overline{y}}{\overline{x^2}\cdot\overline{w^2}-\overline{x}^2} \f]
Here the averages are defined in terms of a weight vector \f$ w_i\f$:
\f[ \overline{x}=\sum\limits_iw_i^2\cdot x_i \f]
\f[ \overline{y}=\sum\limits_iw_i^2\cdot y_i \f]
\f[ \overline{x\cdot y}=\sum\limits_iw_i^2\cdot x_i\cdot y_i \f]
\f[ \overline{x^2}=\sum\limits_iw_i^2\cdot x_i^2 \f]
\f[ \overline{w^2}=\sum\limits_iw_i^2 \f]
\image html jkqtplotter_simpletest_datastore_regression_linweight.png
*/
template <class InputItX, class InputItY, class InputItW>
inline void jkqtpstatLinearWeightedRegression(InputItX firstX, InputItX lastX, InputItY firstY, InputItY lastY, InputItW firstW, InputItW lastW, double& coeffA, double& coeffB, bool fixA=false, bool fixB=false, std::function<double(double)> fWeightDataToWi=&jkqtp_identity<double>) {
if (fixA&&fixB) return;
const int Nx=std::distance(firstX,lastX);
const int Ny=std::distance(firstY,lastY);
const int Nw=std::distance(firstW,lastW);
JKQTPASSERT(Nx>1 && Ny>1 && Nw>1);
double sumx=0, sumy=0, sumxy=0, sumx2=0, sumw2=0;
size_t N=0;
auto itX=firstX;
auto itY=firstY;
auto itW=firstW;
for (; itX!=lastX && itY!=lastY && itW!=lastW; ++itX, ++itY, ++itW) {
const double fit_x=jkqtp_todouble(*itX);
const double fit_y=jkqtp_todouble(*itY);
const double fit_w2=jkqtp_sqr(fWeightDataToWi(jkqtp_todouble(*itW)));
if (JKQTPIsOKFloat(fit_x)&&JKQTPIsOKFloat(fit_y)&&JKQTPIsOKFloat(fit_w2)) {
sumx=sumx+fit_w2*fit_x;
sumy=sumy+fit_w2*fit_y;
sumxy=sumxy+fit_w2*fit_x*fit_y;
sumx2=sumx2+fit_w2*fit_x*fit_x;
sumw2=sumw2+fit_w2;
N++;
}
}
const double NN=static_cast<double>(N);
JKQTPASSERT_M(NN>1, "too few datapoints");
if (!fixA && !fixB) {
coeffB=(double(sumxy)*double(sumw2)-double(sumx)*double(sumy))/(double(sumx2)*double(sumw2)-double(sumx)*double(sumx));
coeffA=(double(sumy)-coeffB*double(sumx))/double(sumw2);
} else if (fixA && !fixB) {
coeffB=(double(sumy)-coeffA*double(sumw2))/double(sumx);
} else if (!fixA && fixB) {
coeffA=(double(sumy)-coeffB*double(sumx))/double(sumw2);
}
}
/*! \brief calculate the (robust) iteratively reweighted least-squares (IRLS) estimate for the parameters of the model \f$ f(x)=a+b\cdot x \f$
for a given data range \a firstX / \a firstY ... \a lastX / \a lastY
So this function finds an outlier-robust solution to the optimization problem:
\f[ (a^\ast,b^\ast)=\mathop{\mathrm{arg\;min}}\limits_{a,b}\sum\limits_i|a+b\cdot x_i-y_i|^p \f]
\ingroup jkqtptools_math_statistics_regression
\ingroup jkqtptools_math_statistics_regression
\tparam InputItX standard iterator type of \a firstX and \a lastX.
\tparam InputItY standard iterator type of \a firstY and \a lastY.
\param firstX iterator pointing to the first item in the x-dataset to use \f$ x_1 \f$
\param lastX iterator pointing behind the last item in the x-dataset to use \f$ x_N \f$
\param firstY iterator pointing to the first item in the y-dataset to use \f$ y_1 \f$
\param lastY iterator pointing behind the last item in the y-dataset to use \f$ y_N \f$
\param[in,out] coeffA returns the offset of the linear model
\param[in,out] coeffB returns the slope of the linear model
\param fixA if \c true, the offset coefficient \f$ a \f$ is not determined by the fit, but the value provided in \a coeffA is used
\param fixB if \c true, the slope coefficient \f$ b \f$ is not determined by the fit, but the value provided in \a coeffB is used
\param p regularization parameter, the optimization problem is formulated in the \f$ L_p \f$ norm, using this \a p (see image below for an example)
\param iterations the number of iterations the IRLS algorithm performs
This is a simple form of the IRLS algorithm to estimate the parameters a and b in a linear model \f$ f(x)=a+b\cdot x \f$.
This algorithm solves the optimization problem for a \f$ L_p\f$-norm:
\f[ (a^\ast,b^\ast)=\mathop{\mathrm{arg\;min}}\limits_{a,b}\sum\limits_i|a+b\cdot x_i-y_i|^p \f]
by iteratively optimization weights \f$ \vec{w} \f$ and solving a weighted least squares problem in each iteration:
\f[ (a_n,b_n)=\mathop{\mathrm{arg\;min}}\limits_{a,b}\sum\limits_i|a+b\cdot x_i-y_i|^{(p-2)}\cdot|a+b\cdot x_i-y_i|^2 \f]
The IRLS-algorithm works as follows:
- calculate initial \f$ a_0\f$ and \f$ b_0\f$ with unweighted regression from x and y
- perform a number of iterations (parameter \a iterations ). In each iteration \f$ n\f$:
- calculate the error vector \f$\vec{e}\f$: \f[ e_i = a+b\cdot x_i -y_i \f]
- estimate new weights \f$\vec{w}\f$: \f[ w_i=|e_i|^{(p-2)/2} \f]
- calculate new estimates \f$ a_n\f$ and \f$ b_n\f$ with weighted regression from \f$ \vec{x}\f$ and \f$ \vec{y}\f$ and \f$ \vec{w}\f$
.
- return the last estimates \f$ a_n\f$ and \f$ b_n\f$
.
\image html irls.png
\image html jkqtplotter_simpletest_datastore_regression_linrobust_p.png
\see https://en.wikipedia.org/wiki/Iteratively_reweighted_least_squares, C. Sidney Burrus: "Iterative Reweighted Least Squares", <a href="http://cnx.org/content/m45285/latest/">http://cnx.org/content/m45285/latest/</a>
*/
template <class InputItX, class InputItY>
inline void jkqtpstatRobustIRLSLinearRegression(InputItX firstX, InputItX lastX, InputItY firstY, InputItY lastY, double& coeffA, double& coeffB, bool fixA=false, bool fixB=false, double p=1.1, int iterations=100) {
if (fixA&&fixB) return;
const int Nx=std::distance(firstX,lastX);
const int Ny=std::distance(firstY,lastY);
const int N=std::min(Nx,Ny);
JKQTPASSERT(Nx>1 && Ny>1);
std::vector<double> weights;
std::fill_n(std::back_inserter(weights), N, 1.0);
double alast=coeffA, blast=coeffB;
jkqtpstatLinearWeightedRegression(firstX, lastX, firstY, lastY, weights.begin(), weights.end(), alast, blast, fixA, fixB, &jkqtp_identity<double>);
for (int it=0; it<iterations-1; it++) {
// calculate weights
auto itX=firstX;
auto itY=firstY;
for (double& w: weights) {
const double fit_x=*itX;
const double fit_y=*itY;
const double e=alast+blast*fit_x-fit_y;
w=pow(std::max<double>(JKQTP_EPSILON*100.0, fabs(e)), (p-2.0)/2.0);
++itX;
++itY;
}
// solve weighted linear least squares
jkqtpstatLinearWeightedRegression(firstX, lastX, firstY, lastY, weights.begin(), weights.end(), alast, blast, fixA, fixB, &jkqtp_identity<double>);
}
coeffA=alast;
coeffB=blast;
}
/*! \brief when performing linear regression, different target functions can be fitted, if the input data is transformed accordingly. This library provides the options in this enum by default.
\ingroup jkqtptools_math_statistics_regression
*/
enum class JKQTPStatRegressionModelType {
Linear, /*!< \brief linear model \f$ f(x)=a+b\cdot x \f$ */
PowerLaw, /*!< \brief power law model \f$ f(x)=a\cdot x^b \f$ */
Exponential, /*!< \brief exponential model \f$ f(x)=a\cdot \exp(b\cdot x) \f$ */
Logarithm, /*!< \brief exponential model \f$ f(x)=a+b\cdot \ln(x) \f$ */
};
/*! \brief Generates functors \c f(x,a,b) for the models from JKQTPStatRegressionModelType in \a type
\ingroup jkqtptools_math_statistics_regression
*/
JKQTP_LIB_EXPORT std::function<double(double, double, double)> jkqtpStatGenerateRegressionModel(JKQTPStatRegressionModelType type);
/*! \brief Generates a LaTeX string for the models from JKQTPStatRegressionModelType in \a type
\ingroup jkqtptools_math_statistics_regression
*/
JKQTP_LIB_EXPORT QString jkqtpstatRegressionModel2Latex(JKQTPStatRegressionModelType type, double a, double b);
/*! \brief Generates functors \c f(x) for the models from JKQTPStatRegressionModelType in \a type and binds the parameter values \a and \a b to the returned function
\ingroup jkqtptools_math_statistics_regression
*/
JKQTP_LIB_EXPORT std::function<double(double)> jkqtpStatGenerateRegressionModel(JKQTPStatRegressionModelType type, double a, double b);
/*! \brief Generates the transformation function for x-data (\c result.first ) and y-data (\c result.second ) for each regression model in JKQTPStatRegressionModelType in \a type
\ingroup jkqtptools_math_statistics_regression
\internal
*/
JKQTP_LIB_EXPORT std::pair<std::function<double(double)>,std::function<double(double)> > jkqtpStatGenerateTransformation(JKQTPStatRegressionModelType type);
/*! \brief Generates the transformation function for a-parameter (offset, \c result.first : transform, \c result.second : back-transform) for each regression model in JKQTPStatRegressionModelType in \a type
\ingroup jkqtptools_math_statistics_regression
\internal
*/
JKQTP_LIB_EXPORT std::pair<std::function<double(double)>,std::function<double(double)> > jkqtpStatGenerateParameterATransformation(JKQTPStatRegressionModelType type);
/*! \brief Generates the transformation function for b-parameter (slope, \c result.first : transform, \c result.second : back-transform) for each regression model in JKQTPStatRegressionModelType in \a type
\ingroup jkqtptools_math_statistics_regression
\internal
*/
JKQTP_LIB_EXPORT std::pair<std::function<double(double)>,std::function<double(double)> > jkqtpStatGenerateParameterBTransformation(JKQTPStatRegressionModelType type);
/*! \brief calculate the linear regression coefficients for a given data range \a firstX / \a firstY ... \a lastX / \a lastY where the model is defined by \a type
So this function solves the least-squares optimization problem: \f[ (a^\ast, b^\ast)=\mathop{\mathrm{arg\;min}}\limits_{a,b}\sum\limits_i\left(y_i-f_{\text{type}}(x_i,a,b)\right)^2 \f]
by reducing it to a linear fit by transforming x- and/or y-data
\ingroup jkqtptools_math_statistics_regression
\tparam InputItX standard iterator type of \a firstX and \a lastX.
\tparam InputItY standard iterator type of \a firstY and \a lastY.
\param type model to be fitted
\param firstX iterator pointing to the first item in the x-dataset to use \f$ x_1 \f$
\param lastX iterator pointing behind the last item in the x-dataset to use \f$ x_N \f$
\param firstY iterator pointing to the first item in the y-dataset to use \f$ y_1 \f$
\param lastY iterator pointing behind the last item in the y-dataset to use \f$ y_N \f$
\param[in,out] coeffA returns the offset of the linear model
\param[in,out] coeffB returns the slope of the linear model
\param fixA if \c true, the offset coefficient \f$ a \f$ is not determined by the fit, but the value provided in \a coeffA is used
\param fixB if \c true, the slope coefficient \f$ b \f$ is not determined by the fit, but the value provided in \a coeffB is used
This function computes internally first transforms the data, as appropriate to fit the model defined by \a type and then calls jkqtpstatLinearRegression()
to obtain the parameters. The output parameters are transformed, so they can be used with jkqtpStatGenerateRegressionModel() to generate a functor
that evaluates the model
\see JKQTPStatRegressionModelType, jkqtpStatGenerateRegressionModel(), jkqtpstatLinearRegression(), jkqtpStatGenerateTransformation()
*/
template <class InputItX, class InputItY>
inline void jkqtpstatRegression(JKQTPStatRegressionModelType type, InputItX firstX, InputItX lastX, InputItY firstY, InputItY lastY, double& coeffA, double& coeffB, bool fixA=false, bool fixB=false) {
std::vector<double> x, y;
auto trafo=jkqtpStatGenerateTransformation(type);
auto aTrafo =jkqtpStatGenerateParameterATransformation(type);
auto bTrafo =jkqtpStatGenerateParameterBTransformation(type);
std::transform(firstX, lastX, std::back_inserter(x), trafo.first);
std::transform(firstY, lastY, std::back_inserter(y), trafo.second);
double a=aTrafo.first(coeffA);
double b=bTrafo.first(coeffB);
jkqtpstatLinearRegression(x.begin(), x.end(), y.begin(), y.end(), a, b, fixA, fixB);
coeffA=aTrafo.second(a);
coeffB=bTrafo.second(b);
}
/*! \brief calculate the robust linear regression coefficients for a given data range \a firstX / \a firstY ... \a lastX / \a lastY where the model is defined by \a type
So this function solves the Lp-norm optimization problem: \f[ (a^\ast, b^\ast)=\mathop{\mathrm{arg\;min}}\limits_{a,b}\sum\limits_i\left(y_i-f_{\text{type}}(x_i,a,b)\right)^p \f]
by reducing it to a linear fit by transforming x- and/or y-data
\ingroup jkqtptools_math_statistics_regression
\tparam InputItX standard iterator type of \a firstX and \a lastX.
\tparam InputItY standard iterator type of \a firstY and \a lastY.
\param type model to be fitted
\param firstX iterator pointing to the first item in the x-dataset to use \f$ x_1 \f$
\param lastX iterator pointing behind the last item in the x-dataset to use \f$ x_N \f$
\param firstY iterator pointing to the first item in the y-dataset to use \f$ y_1 \f$
\param lastY iterator pointing behind the last item in the y-dataset to use \f$ y_N \f$
\param[in,out] coeffA returns the offset of the linear model
\param[in,out] coeffB returns the slope of the linear model
\param fixA if \c true, the offset coefficient \f$ a \f$ is not determined by the fit, but the value provided in \a coeffA is used
\param fixB if \c true, the slope coefficient \f$ b \f$ is not determined by the fit, but the value provided in \a coeffB is used
\param p regularization parameter, the optimization problem is formulated in the \f$ L_p \f$ norm, using this \a p (see image below for an example)
\param iterations the number of iterations the IRLS algorithm performs
This function computes internally first transforms the data, as appropriate to fit the model defined by \a type and then calls jkqtpstatRobustIRLSLinearRegression()
to obtain the parameters. The output parameters are transformed, so they can be used with jkqtpStatGenerateRegressionModel() to generate a functor
that evaluates the model
\see JKQTPStatRegressionModelType, jkqtpStatGenerateRegressionModel(), jkqtpstatRobustIRLSLinearRegression(), jkqtpStatGenerateTransformation()
*/
template <class InputItX, class InputItY>
inline void jkqtpstatRobustIRLSRegression(JKQTPStatRegressionModelType type, InputItX firstX, InputItX lastX, InputItY firstY, InputItY lastY, double& coeffA, double& coeffB, bool fixA=false, bool fixB=false, double p=1.1, int iterations=100) {
std::vector<double> x, y;
auto trafo=jkqtpStatGenerateTransformation(type);
auto aTrafo =jkqtpStatGenerateParameterATransformation(type);
auto bTrafo =jkqtpStatGenerateParameterBTransformation(type);
std::transform(firstX, lastX, std::back_inserter(x), trafo.first);
std::transform(firstY, lastY, std::back_inserter(y), trafo.second);
double a=aTrafo.first(coeffA);
double b=bTrafo.first(coeffB);
jkqtpstatRobustIRLSLinearRegression(x.begin(), x.end(), y.begin(), y.end(), a, b, fixA, fixB, p, iterations);
coeffA=aTrafo.second(a);
coeffB=bTrafo.second(b);
}
/*! \brief calculate the robust linear regression coefficients for a given data range \a firstX / \a firstY ... \a lastX / \a lastY where the model is defined by \a type
So this function solves the Lp-norm optimization problem: \f[ (a^\ast, b^\ast)=\mathop{\mathrm{arg\;min}}\limits_{a,b}\sum\limits_i\left(y_i-f_{\text{type}}(x_i,a,b)\right)^p \f]
by reducing it to a linear fit by transforming x- and/or y-data
\ingroup jkqtptools_math_statistics_regression
\tparam InputItX standard iterator type of \a firstX and \a lastX.
\tparam InputItY standard iterator type of \a firstY and \a lastY.
\tparam InputItW standard iterator type of \a firstW and \a lastW.
\param type model to be fitted
\param firstX iterator pointing to the first item in the x-dataset to use \f$ x_1 \f$
\param lastX iterator pointing behind the last item in the x-dataset to use \f$ x_N \f$
\param firstY iterator pointing to the first item in the y-dataset to use \f$ y_1 \f$
\param lastY iterator pointing behind the last item in the y-dataset to use \f$ y_N \f$
\param firstW iterator pointing to the first item in the weight-dataset to use \f$ w_1 \f$
\param lastW iterator pointing behind the last item in the weight-dataset to use \f$ w_N \f$
\param[in,out] coeffA returns the offset of the linear model
\param[in,out] coeffB returns the slope of the linear model
\param fixA if \c true, the offset coefficient \f$ a \f$ is not determined by the fit, but the value provided in \a coeffA is used
\param fixB if \c true, the slope coefficient \f$ b \f$ is not determined by the fit, but the value provided in \a coeffB is used
\param fWeightDataToWi an optional function, which is applied to the data from \a firstW ... \a lastW to convert them to weight, i.e. \c wi=fWeightDataToWi(*itW)
e.g. if you use data used to draw error bars, you can use jkqtp_inversePropSaveDefault(). The default is jkqtp_identity(), which just returns the values.
In the case of jkqtp_inversePropSaveDefault(), a datapoint x,y, has a large weight, if it's error is small and in the case if jkqtp_identity() it's weight
is directly proportional to the given value.
This function computes internally first transforms the data, as appropriate to fit the model defined by \a type and then calls jkqtpstatLinearWeightedRegression()
to obtain the parameters. The output parameters are transformed, so they can be used with jkqtpStatGenerateRegressionModel() to generate a functor
that evaluates the model
\see JKQTPStatRegressionModelType, jkqtpStatGenerateRegressionModel(), jkqtpstatLinearWeightedRegression(), jkqtpStatGenerateTransformation()
*/
template <class InputItX, class InputItY, class InputItW>
inline void jkqtpstatWeightedRegression(JKQTPStatRegressionModelType type, InputItX firstX, InputItX lastX, InputItY firstY, InputItY lastY, InputItW firstW, InputItW lastW, double& coeffA, double& coeffB, bool fixA=false, bool fixB=false, std::function<double(double)> fWeightDataToWi=&jkqtp_identity<double>) {
std::vector<double> x, y;
auto trafo=jkqtpStatGenerateTransformation(type);
auto aTrafo =jkqtpStatGenerateParameterATransformation(type);
auto bTrafo =jkqtpStatGenerateParameterBTransformation(type);
std::transform(firstX, lastX, std::back_inserter(x), trafo.first);
std::transform(firstY, lastY, std::back_inserter(y), trafo.second);
double a=aTrafo.first(coeffA);
double b=bTrafo.first(coeffB);
jkqtpstatLinearWeightedRegression(x.begin(), x.end(), y.begin(), y.end(), firstW, lastW, a, b, fixA, fixB, fWeightDataToWi);
coeffA=aTrafo.second(a);
coeffB=bTrafo.second(b);
}
/*! \brief calculates the coefficient of determination \f$ R^2 \f$ for a set of measurements \f$ (x_i,y_i) \f$ with a fit function \f$ f(x) \f$
\ingroup jkqtptools_math_statistics_regression
\tparam InputItX standard iterator type of \a firstX and \a lastX.
\tparam InputItY standard iterator type of \a firstY and \a lastY.
\param firstX iterator pointing to the first item in the x-dataset to use \f$ x_1 \f$
\param lastX iterator pointing behind the last item in the x-dataset to use \f$ x_N \f$
\param firstY iterator pointing to the first item in the y-dataset to use \f$ y_1 \f$
\param lastY iterator pointing behind the last item in the y-dataset to use \f$ y_N \f$
\param f function \f$ f(x) \f$, result of a fit to the data
\return coeffcicient of determination \f[ R^2=1-\frac{\sum_i\bigl[y_i-f(x_i)\bigr]^2}{\sum_i\bigl[y_i-\overline{y}\bigr]^2} \f] where \f[ \overline{y}=\frac{1}{N}\cdot\sum_iy_i \f]
\see https://en.wikipedia.org/wiki/Coefficient_of_determination
*/
template <class InputItX, class InputItY>
inline double jkqtpstatCoefficientOfDetermination(InputItX firstX, InputItX lastX, InputItY firstY, InputItY lastY, std::function<double(double)> f) {
auto itX=firstX;
auto itY=firstY;
const double yMean=jkqtpstatAverage(firstX,lastX);
double SSres=0;
double SStot=0;
for (; itX!=lastX && itY!=lastY; ++itX, ++itY) {
const double fit_x=jkqtp_todouble(*itX);
const double fit_y=jkqtp_todouble(*itY);
if (JKQTPIsOKFloat(fit_x) && JKQTPIsOKFloat(fit_y)) {
SStot+=jkqtp_sqr(fit_y-yMean);
SSres+=jkqtp_sqr(fit_y-f(fit_x));
}
}
return 1.0-SSres/SStot;
}
/*! \brief calculates the weightedcoefficient of determination \f$ R^2 \f$ for a set of measurements \f$ (x_i,y_i,w_i) \f$ with a fit function \f$ f(x) \f$
\ingroup jkqtptools_math_statistics_regression
\tparam InputItX standard iterator type of \a firstX and \a lastX.
\tparam InputItY standard iterator type of \a firstY and \a lastY.
\tparam InputItW standard iterator type of \a firstW and \a lastW.
\param firstX iterator pointing to the first item in the x-dataset to use \f$ x_1 \f$
\param lastX iterator pointing behind the last item in the x-dataset to use \f$ x_N \f$
\param firstY iterator pointing to the first item in the y-dataset to use \f$ y_1 \f$
\param lastY iterator pointing behind the last item in the y-dataset to use \f$ y_N \f$
\param firstW iterator pointing to the first item in the weight-dataset to use \f$ w_1 \f$
\param lastW iterator pointing behind the last item in the weight-dataset to use \f$ w_N \f$
\param f function \f$ f(x) \f$, result of a fit to the data
\param fWeightDataToWi an optional function, which is applied to the data from \a firstW ... \a lastW to convert them to weight, i.e. \c wi=fWeightDataToWi(*itW)
e.g. if you use data used to draw error bars, you can use jkqtp_inversePropSaveDefault(). The default is jkqtp_identity(), which just returns the values.
In the case of jkqtp_inversePropSaveDefault(), a datapoint x,y, has a large weight, if it's error is small and in the case if jkqtp_identity() it's weight
is directly proportional to the given value.
\return weighted coeffcicient of determination \f[ R^2=1-\frac{\sum_iw_i^2\bigl[y_i-f(x_i)\bigr]^2}{\sum_iw_i^2\bigl[y_i-\overline{y}\bigr]^2} \f] where \f[ \overline{y}=\frac{1}{N}\cdot\sum_iw_iy_i \f]
with \f[ \sum_iw_i=1 \f]
\see https://en.wikipedia.org/wiki/Coefficient_of_determination
*/
template <class InputItX, class InputItY, class InputItW>
inline double jkqtpstatWeightedCoefficientOfDetermination(InputItX firstX, InputItX lastX, InputItY firstY, InputItY lastY, InputItW firstW, InputItW lastW, std::function<double(double)> f, std::function<double(double)> fWeightDataToWi=&jkqtp_identity<double>) {
auto itX=firstX;
auto itY=firstY;
auto itW=firstW;
const double yMean=jkqtpstatWeightedAverage(firstX,lastX,firstW);
double SSres=0;
double SStot=0;
for (; itX!=lastX && itY!=lastY && itW!=lastW; ++itX, ++itY, ++itW) {
const double fit_x=jkqtp_todouble(*itX);
const double fit_y=jkqtp_todouble(*itY);
const double fit_w2=jkqtp_sqr(fWeightDataToWi(jkqtp_todouble(*itW)));
if (JKQTPIsOKFloat(fit_x) && JKQTPIsOKFloat(fit_y) && JKQTPIsOKFloat(fit_w2)) {
SSres+=(fit_w2*jkqtp_sqr(fit_y-f(fit_x)));
SStot+=(fit_w2*jkqtp_sqr(fit_y-yMean));
}
}
return 1.0-SSres/SStot;
}
/*! \brief calculates the sum of deviations \f$ \chi^2 \f$ for a set of measurements \f$ (x_i,y_i) \f$ with a fit function \f$ f(x) \f$
\ingroup jkqtptools_math_statistics_regression
\tparam InputItX standard iterator type of \a firstX and \a lastX.
\tparam InputItY standard iterator type of \a firstY and \a lastY.
\param firstX iterator pointing to the first item in the x-dataset to use \f$ x_1 \f$
\param lastX iterator pointing behind the last item in the x-dataset to use \f$ x_N \f$
\param firstY iterator pointing to the first item in the y-dataset to use \f$ y_1 \f$
\param lastY iterator pointing behind the last item in the y-dataset to use \f$ y_N \f$
\param f function \f$ f(x) \f$, result of a fit to the data
\return sum of deviations \f[ \chi^2=\sum_i\bigl[y_i-f(x_i)\bigr]^2 \f]
\see https://en.wikipedia.org/wiki/Coefficient_of_determination
*/
template <class InputItX, class InputItY>
inline double jkqtpstatSumOfDeviations(InputItX firstX, InputItX lastX, InputItY firstY, InputItY lastY, std::function<double(double)> f) {
auto itX=firstX;
auto itY=firstY;
double SSres=0;
for (; itX!=lastX && itY!=lastY; ++itX, ++itY) {
const double fit_x=jkqtp_todouble(*itX);
const double fit_y=jkqtp_todouble(*itY);
if (JKQTPIsOKFloat(fit_x) && JKQTPIsOKFloat(fit_y)) {
SSres+=jkqtp_sqr(fit_y-f(fit_x));
}
}
return SSres;
}
/*! \brief calculates the weighted sum of deviations \f$ \chi^2 \f$ for a set of measurements \f$ (x_i,y_i,w_i) \f$ with a fit function \f$ f(x) \f$
\ingroup jkqtptools_math_statistics_regression
\tparam InputItX standard iterator type of \a firstX and \a lastX.
\tparam InputItY standard iterator type of \a firstY and \a lastY.
\tparam InputItW standard iterator type of \a firstW and \a lastW.
\param firstX iterator pointing to the first item in the x-dataset to use \f$ x_1 \f$
\param lastX iterator pointing behind the last item in the x-dataset to use \f$ x_N \f$
\param firstY iterator pointing to the first item in the y-dataset to use \f$ y_1 \f$
\param lastY iterator pointing behind the last item in the y-dataset to use \f$ y_N \f$
\param firstW iterator pointing to the first item in the weight-dataset to use \f$ w_1 \f$
\param lastW iterator pointing behind the last item in the weight-dataset to use \f$ w_N \f$
\param f function \f$ f(x) \f$, result of a fit to the data
\param fWeightDataToWi an optional function, which is applied to the data from \a firstW ... \a lastW to convert them to weight, i.e. \c wi=fWeightDataToWi(*itW)
e.g. if you use data used to draw error bars, you can use jkqtp_inversePropSaveDefault(). The default is jkqtp_identity(), which just returns the values.
In the case of jkqtp_inversePropSaveDefault(), a datapoint x,y, has a large weight, if it's error is small and in the case if jkqtp_identity() it's weight
is directly proportional to the given value.
\return weighted sum of deviations \f[ \chi^2=\sum_iw_i^2\cdot\bigl[y_i-f(x_i)\bigr]^2 \f]
\see https://en.wikipedia.org/wiki/Reduced_chi-squared_statistic
*/
template <class InputItX, class InputItY, class InputItW>
inline double jkqtpstatWeightedSumOfDeviations(InputItX firstX, InputItX lastX, InputItY firstY, InputItY lastY, InputItW firstW, InputItW lastW, std::function<double(double)> f, std::function<double(double)> fWeightDataToWi=&jkqtp_identity<double>) {
auto itX=firstX;
auto itY=firstY;
auto itW=firstW;
double SSres=0;
for (; itX!=lastX && itY!=lastY && itW!=lastW; ++itX, ++itY, ++itW) {
const double fit_x=jkqtp_todouble(*itX);
const double fit_y=jkqtp_todouble(*itY);
const double fit_w2=jkqtp_sqr(fWeightDataToWi(jkqtp_todouble(*itW)));
if (JKQTPIsOKFloat(fit_x) && JKQTPIsOKFloat(fit_y) && JKQTPIsOKFloat(fit_w2)) {
SSres+=fit_w2*jkqtp_sqr(fit_y-f(fit_x));
}
}
return SSres;
}
#endif // JKQTPSTATREGRESSION_H_INCLUDED

View File

@ -30,7 +30,12 @@ isEmpty(JKQTP_COMMON_PRI_INCLUDED) {
$$PWD/jkqtcommon/jkqtpmathparser.h \
$$PWD/jkqtcommon/jkqttools.h \
$$PWD/jkqtcommon/jkqtparraytools.h \
$$PWD/jkqtcommon/jkqtpstatisticstools.h
$$PWD/jkqtcommon/jkqtpstatisticstools.h \
$$PWD/jkqtcommon/jkqtpstatbasics.h \
$$PWD/jkqtcommon/jkqtpstathistogram.h \
$$PWD/jkqtcommon/jkqtpstatkde.h \
$$PWD/jkqtcommon/jkqtpstatregression.h \
$$PWD/jkqtcommon/jkqtpstatpoly.h
SOURCES += $$PWD/jkqtcommon/jkqtpdebuggingtools.cpp \
@ -46,7 +51,12 @@ isEmpty(JKQTP_COMMON_PRI_INCLUDED) {
$$PWD/jkqtcommon/jkqtpmathparser.cpp \
$$PWD/jkqtcommon/jkqttools.cpp \
$$PWD/jkqtcommon/jkqtparraytools.cpp \
$$PWD/jkqtcommon/jkqtpstatisticstools.cpp
$$PWD/jkqtcommon/jkqtpstatisticstools.cpp \
$$PWD/jkqtcommon/jkqtpstatbasics.cpp \
$$PWD/jkqtcommon/jkqtpstathistogram.cpp \
$$PWD/jkqtcommon/jkqtpstatkde.cpp \
$$PWD/jkqtcommon/jkqtpstatregression.cpp \
$$PWD/jkqtcommon/jkqtpstatpoly.cpp
INCLUDEPATH += $$PWD