Pages

Monday, July 7, 2025

c++ doodle statistics summaries

this is a simple example i wrote of doing some simple math/statistics summaries in c++.  it uses a template class to be able to work on various vector data types.  there may be bugs so please use/modify at your own risk.

Happy Sketching!


#include <iostream>
#include <vector>
#include <cmath> //for pow, sqrt
#include <stdexcept> //for throwing errors


//responsible for simple statistic summaries for numeric std vectors. ex: floats, doubles, int
template<typename T>
class NumericVectorSummary
{
private:
	void assertAtLeastNumElements(std::vector<T> data, int numElements) const
	{
		int len = data.size();
		if(len < numElements)
		{
			throw std::runtime_error("Expecting more elements.");
		}
	}

	void assertNumElementsEqual(std::vector<T> x, std::vector<T> y) const
	{
		int xlen = x.size();
		int ylen = y.size();
		if(xlen != ylen)
		{
			throw std::runtime_error("Expecting same number of elements.");
		}
	}

public:
	T mean(std::vector<T> data) const{
		assertAtLeastNumElements(data, 1); //assumes >= 1 element provided
		int len = data.size(); //assumes some input provided
		T sum = 0;
		typename std::vector<T>::iterator it;
		for(it = data.begin(); it != data.end(); ++it)
		{
			sum += *it;
		}
		return sum/len;
	}

	//theoretical: E((X-E(X))^2) 
	//sample: sum (xi-xbar)^2 / (n-1)
	T variance(std::vector<T> data) const{
		assertAtLeastNumElements(data, 1);
		int len = data.size(); //assumes > 1 input provided

		T numerator = 0;
		T sampleMean = mean(data);
		typename std::vector<T>::iterator it;
		for(it=data.begin(); it != data.end(); ++it)
		{
			numerator += pow((*it - sampleMean), 2);
		}

		return numerator/(len - 1);
	}

	T sd(std::vector<T> data) const{
		assertAtLeastNumElements(data, 1);
		return sqrt(variance(data));
	}

	//theoretical: E( (X-E(X))(Y-E(Y)) )
	//sample: sum( (x-xbar)^2 (y-ybar)^2 )/(n-1)
	T covariance(std::vector<T> x, std::vector<T> y)
	{
		//assert inputs have same length
		//assert inputs > 1 element
		assertAtLeastNumElements(x, 1);
		assertAtLeastNumElements(y, 1);
		assertNumElementsEqual(x, y);

		int len = x.size();
		T numerator = 0;
		T xmean = mean(x);
		T ymean = mean(y);
		for(int i=0; i < len; i++)
		{
			numerator += (x[i]-xmean)*(y[i]-ymean);
		}

		return numerator/(len-1);
	}

	//theoretical: E( (X-E(X))(Y-E(Y)) )/ sqrt(Var(X))*sqrt(Var(Y))
	T cor(std::vector<T> x, std::vector<T> y)
	{
		T cov = covariance(x, y);
		T xsd = sd(x);
		T ysd = sd(y);
		return cov/(xsd*ysd);
	}

};


void test_mean()
{
	printf("test_mean\n");
	NumericVectorSummary<float> summary;
	std::vector<float> data = {1, 2, 3};
	printf("%f\n", summary.mean(data));
	printf("expected: %f\n", 2.0);

	NumericVectorSummary<double> summaryD;
	std::vector<double> dataD = {3, 4, 5};
	printf("%f\n", summaryD.mean(dataD));
	printf("expected: %f\n", 4.0);
}

void test_variance()
{
	printf("test_variance\n");
	NumericVectorSummary<float> summary;
	std::vector<float> data = {1, 2, 3};
	printf("%f\n", summary.variance(data));
	printf("expected: %f\n", 1.0);

	printf("test_sd\n");
	NumericVectorSummary<double> summaryD;
	std::vector<double> dataD = {3, 4, 5};
	printf("%f\n", summaryD.sd(dataD));
	printf("expected: %f\n", 1.0);	
}

void test_covariance()
{
	printf("test_covariance\n");
	NumericVectorSummary<float> summary;
	std::vector<float> x = {1, 2, 3};
	std::vector<float> y = {3, 5, 7};
	printf("%f\n", summary.covariance(x, y));
	printf("expected: %f\n", 2.0);

	/*in r
	x = c(1,2,3)
	y = c(3,5,7)
	cov(x,y)
	*/	

	printf("test_correlation\n");
	printf("%f\n", summary.cor(x, y));
	printf("expected: %f\n", 1.0);

	/*in r
	x = c(1,2,3)
	y = c(3,5,7)
	cor(x,y)	
	*/
}

void test_correlation_element_length()
{
	NumericVectorSummary<float> summary;
	std::vector<float> x = {1, 2, 3};
	std::vector<float> y = {3, 5};
	printf("%f\n", summary.covariance(x, y));
	printf("expected runtime error\n");
}

int main()
{
	printf("yaay\n");
	test_mean();
	test_variance();
	test_covariance();
	//test_correlation_element_length();
	return 0;
}

/*
g++ -o prog data_simple_stats.cpp -std=c++11
./prog
*/