Monday, July 7, 2025

c++ doodle statistics summaries

this is a simple example i wrote of doing some simple math/statistics summaries in c++.  it uses a template class to be able to work on various vector data types.  there may be bugs so please use/modify at your own risk.

Happy Sketching!


#include <iostream>
#include <vector>
#include <cmath> //for pow, sqrt
#include <stdexcept> //for throwing errors


//responsible for simple statistic summaries for numeric std vectors. ex: floats, doubles, int
template<typename T>
class NumericVectorSummary
{
private:
	void assertAtLeastNumElements(std::vector<T> data, int numElements) const
	{
		int len = data.size();
		if(len < numElements)
		{
			throw std::runtime_error("Expecting more elements.");
		}
	}

	void assertNumElementsEqual(std::vector<T> x, std::vector<T> y) const
	{
		int xlen = x.size();
		int ylen = y.size();
		if(xlen != ylen)
		{
			throw std::runtime_error("Expecting same number of elements.");
		}
	}

public:
	T mean(std::vector<T> data) const{
		assertAtLeastNumElements(data, 1); //assumes >= 1 element provided
		int len = data.size(); //assumes some input provided
		T sum = 0;
		typename std::vector<T>::iterator it;
		for(it = data.begin(); it != data.end(); ++it)
		{
			sum += *it;
		}
		return sum/len;
	}

	//theoretical: E((X-E(X))^2) 
	//sample: sum (xi-xbar)^2 / (n-1)
	T variance(std::vector<T> data) const{
		assertAtLeastNumElements(data, 1);
		int len = data.size(); //assumes > 1 input provided

		T numerator = 0;
		T sampleMean = mean(data);
		typename std::vector<T>::iterator it;
		for(it=data.begin(); it != data.end(); ++it)
		{
			numerator += pow((*it - sampleMean), 2);
		}

		return numerator/(len - 1);
	}

	T sd(std::vector<T> data) const{
		assertAtLeastNumElements(data, 1);
		return sqrt(variance(data));
	}

	//theoretical: E( (X-E(X))(Y-E(Y)) )
	//sample: sum( (x-xbar)^2 (y-ybar)^2 )/(n-1)
	T covariance(std::vector<T> x, std::vector<T> y)
	{
		//assert inputs have same length
		//assert inputs > 1 element
		assertAtLeastNumElements(x, 1);
		assertAtLeastNumElements(y, 1);
		assertNumElementsEqual(x, y);

		int len = x.size();
		T numerator = 0;
		T xmean = mean(x);
		T ymean = mean(y);
		for(int i=0; i < len; i++)
		{
			numerator += (x[i]-xmean)*(y[i]-ymean);
		}

		return numerator/(len-1);
	}

	//theoretical: E( (X-E(X))(Y-E(Y)) )/ sqrt(Var(X))*sqrt(Var(Y))
	T cor(std::vector<T> x, std::vector<T> y)
	{
		T cov = covariance(x, y);
		T xsd = sd(x);
		T ysd = sd(y);
		return cov/(xsd*ysd);
	}

};


void test_mean()
{
	printf("test_mean\n");
	NumericVectorSummary<float> summary;
	std::vector<float> data = {1, 2, 3};
	printf("%f\n", summary.mean(data));
	printf("expected: %f\n", 2.0);

	NumericVectorSummary<double> summaryD;
	std::vector<double> dataD = {3, 4, 5};
	printf("%f\n", summaryD.mean(dataD));
	printf("expected: %f\n", 4.0);
}

void test_variance()
{
	printf("test_variance\n");
	NumericVectorSummary<float> summary;
	std::vector<float> data = {1, 2, 3};
	printf("%f\n", summary.variance(data));
	printf("expected: %f\n", 1.0);

	printf("test_sd\n");
	NumericVectorSummary<double> summaryD;
	std::vector<double> dataD = {3, 4, 5};
	printf("%f\n", summaryD.sd(dataD));
	printf("expected: %f\n", 1.0);	
}

void test_covariance()
{
	printf("test_covariance\n");
	NumericVectorSummary<float> summary;
	std::vector<float> x = {1, 2, 3};
	std::vector<float> y = {3, 5, 7};
	printf("%f\n", summary.covariance(x, y));
	printf("expected: %f\n", 2.0);

	/*in r
	x = c(1,2,3)
	y = c(3,5,7)
	cov(x,y)
	*/	

	printf("test_correlation\n");
	printf("%f\n", summary.cor(x, y));
	printf("expected: %f\n", 1.0);

	/*in r
	x = c(1,2,3)
	y = c(3,5,7)
	cor(x,y)	
	*/
}

void test_correlation_element_length()
{
	NumericVectorSummary<float> summary;
	std::vector<float> x = {1, 2, 3};
	std::vector<float> y = {3, 5};
	printf("%f\n", summary.covariance(x, y));
	printf("expected runtime error\n");
}

int main()
{
	printf("yaay\n");
	test_mean();
	test_variance();
	test_covariance();
	//test_correlation_element_length();
	return 0;
}

/*
g++ -o prog data_simple_stats.cpp -std=c++11
./prog
*/

Saturday, May 31, 2025

Sketches faces

 








Happy Sketching!


Thursday, May 29, 2025

doodle python class observer design pattern with plotting data example

this is still a work in progress. but the idea was to use object oriented observer design pattern in an example. this one has an AnalysisFactory object communicating with Data objects when they should plot themselves.  there may be bugs so please modify/use at your own risk.

this was the result:


import numpy as np
from matplotlib import pyplot

class BaseData(object):
    def __init__(self, dataPath='', data=None):
        self._dataPath = dataPath #where to find data. supports a path to example database, csv, json 
        self._data = data #optional provide actual data to object like for NUMPYData, PYLISTData subclasses
    def doPlot(self, msg, ax=None):
        """
        Args:
            msg (str) message
            ax (matplotlib.axes.AxesSubplot) pyplot subplot object to use for adding plot
        """
        print("doing plot with message '{}'".format(msg))
        print("using raw data", self._data)
        
    def toNUMPYMatrix(self, msg):
        print("converting data to numpy matrix. assumes all numbers data {}".format(msg))
        return []

class CSVData(BaseData):
    pass

class SQLITEData(BaseData):
    pass

class NUMPYData(BaseData):
    def __init__(self, data):
        super(NUMPYData, self).__init__(data=data)

    def doPlot(self, msg, ax):
        print("class {0} doing plot with message '{1}'".format(self.__class__.__name__, msg))
        print("using raw data", self._data)
        #putting plot code here
        ax.plot(self._data)
        #pyplot.show()
        
class JSONData(BaseData):
    pass

class PYLISTData(BaseData):
    def __init__(self, data):
        super(PYLISTData, self).__init__(data=data)

    def doPlot(self, msg, ax):
        print("class {0} doing plot with message '{1}'".format(self.__class__.__name__, msg))
        print("using raw data", self._data)
        ax.plot(self._data)
        #pyplot.show()
        
class AnalysisFactory(object):
    def __init__(self):
        self._data = []
    
    def addData(self, dataNode):
        self._data.append(dataNode)
        
    def removeData(self, dataNode):
        self._data.remove(dataNode)
        
    def doPlots(self, msg):
        numFigures = len(self._data)
        if not numFigures:
            return
            
        #make figure to hold all plots
        fig, ax = pyplot.subplots(nrows=1, ncols=numFigures)
        if len(self._data) > 1:
            for i, data in enumerate(self._data):
                #print(type(ax[i]))
                data.doPlot(msg, ax[i])
        else:
            #support single plot
            data = self._data[0]
            data.doPlot(msg, ax)
            
        #show completed figure
        pyplot.show()
        
    def toNUMPYMatrix(self, msg):
        matrices = []
        for data in self._data:
            matrices.append(data.toNUMPYMatrix())

listData = PYLISTData(data=[1,2,3])
npData = NUMPYData(data=np.array([[1,2,3],[4,5,6]]))

analysis = AnalysisFactory()
analysis.addData(listData)
analysis.addData(npData)
analysis.doPlots("sending make plot update")

#class PYLISTData doing plot with message 'sending make plot update'
#('using raw data', [1, 2, 3])
#class NUMPYData doing plot with message 'sending make plot update'
#('using raw data', array([[1, 2, 3],
#       [4, 5, 6]])) 

Thanks for looking