this is a simple example i wrote of doing some simple math/statistics summaries in c++. it uses a template class to be able to work on various vector data types. there may be bugs so please use/modify at your own risk.
Happy Sketching!
#include <iostream>
#include <vector>
#include <cmath> //for pow, sqrt
#include <stdexcept> //for throwing errors
//responsible for simple statistic summaries for numeric std vectors. ex: floats, doubles, int
template<typename T>
class NumericVectorSummary
{
private:
void assertAtLeastNumElements(std::vector<T> data, int numElements) const
{
int len = data.size();
if(len < numElements)
{
throw std::runtime_error("Expecting more elements.");
}
}
void assertNumElementsEqual(std::vector<T> x, std::vector<T> y) const
{
int xlen = x.size();
int ylen = y.size();
if(xlen != ylen)
{
throw std::runtime_error("Expecting same number of elements.");
}
}
public:
T mean(std::vector<T> data) const{
assertAtLeastNumElements(data, 1); //assumes >= 1 element provided
int len = data.size(); //assumes some input provided
T sum = 0;
typename std::vector<T>::iterator it;
for(it = data.begin(); it != data.end(); ++it)
{
sum += *it;
}
return sum/len;
}
//theoretical: E((X-E(X))^2)
//sample: sum (xi-xbar)^2 / (n-1)
T variance(std::vector<T> data) const{
assertAtLeastNumElements(data, 1);
int len = data.size(); //assumes > 1 input provided
T numerator = 0;
T sampleMean = mean(data);
typename std::vector<T>::iterator it;
for(it=data.begin(); it != data.end(); ++it)
{
numerator += pow((*it - sampleMean), 2);
}
return numerator/(len - 1);
}
T sd(std::vector<T> data) const{
assertAtLeastNumElements(data, 1);
return sqrt(variance(data));
}
//theoretical: E( (X-E(X))(Y-E(Y)) )
//sample: sum( (x-xbar)^2 (y-ybar)^2 )/(n-1)
T covariance(std::vector<T> x, std::vector<T> y)
{
//assert inputs have same length
//assert inputs > 1 element
assertAtLeastNumElements(x, 1);
assertAtLeastNumElements(y, 1);
assertNumElementsEqual(x, y);
int len = x.size();
T numerator = 0;
T xmean = mean(x);
T ymean = mean(y);
for(int i=0; i < len; i++)
{
numerator += (x[i]-xmean)*(y[i]-ymean);
}
return numerator/(len-1);
}
//theoretical: E( (X-E(X))(Y-E(Y)) )/ sqrt(Var(X))*sqrt(Var(Y))
T cor(std::vector<T> x, std::vector<T> y)
{
T cov = covariance(x, y);
T xsd = sd(x);
T ysd = sd(y);
return cov/(xsd*ysd);
}
};
void test_mean()
{
printf("test_mean\n");
NumericVectorSummary<float> summary;
std::vector<float> data = {1, 2, 3};
printf("%f\n", summary.mean(data));
printf("expected: %f\n", 2.0);
NumericVectorSummary<double> summaryD;
std::vector<double> dataD = {3, 4, 5};
printf("%f\n", summaryD.mean(dataD));
printf("expected: %f\n", 4.0);
}
void test_variance()
{
printf("test_variance\n");
NumericVectorSummary<float> summary;
std::vector<float> data = {1, 2, 3};
printf("%f\n", summary.variance(data));
printf("expected: %f\n", 1.0);
printf("test_sd\n");
NumericVectorSummary<double> summaryD;
std::vector<double> dataD = {3, 4, 5};
printf("%f\n", summaryD.sd(dataD));
printf("expected: %f\n", 1.0);
}
void test_covariance()
{
printf("test_covariance\n");
NumericVectorSummary<float> summary;
std::vector<float> x = {1, 2, 3};
std::vector<float> y = {3, 5, 7};
printf("%f\n", summary.covariance(x, y));
printf("expected: %f\n", 2.0);
/*in r
x = c(1,2,3)
y = c(3,5,7)
cov(x,y)
*/
printf("test_correlation\n");
printf("%f\n", summary.cor(x, y));
printf("expected: %f\n", 1.0);
/*in r
x = c(1,2,3)
y = c(3,5,7)
cor(x,y)
*/
}
void test_correlation_element_length()
{
NumericVectorSummary<float> summary;
std::vector<float> x = {1, 2, 3};
std::vector<float> y = {3, 5};
printf("%f\n", summary.covariance(x, y));
printf("expected runtime error\n");
}
int main()
{
printf("yaay\n");
test_mean();
test_variance();
test_covariance();
//test_correlation_element_length();
return 0;
}
/*
g++ -o prog data_simple_stats.cpp -std=c++11
./prog
*/