Wednesday, March 6, 2024

data visualization doodle python and r

#intro python plotting

#scatter plot
"""
import numpy
from matplotlib import pyplot

x = numpy.random.normal(loc=30, scale=1, size=100)
y = 3*x + 5 + numpy.random.normal(loc=0, scale=1, size=100)

pyplot.scatter(x,y, color="grey")
pyplot.title("a scatter plot")
pyplot.xlabel("x label")
pyplot.ylabel("y label")
pyplot.rcParams.update({'font.size':18}) #increasing font size
pyplot.show()

#inspired by
#https://stackoverflow.com/questions/12236566/setting-different-color-for-each-series-in-scatter-plot
#https://matplotlib.org/stable/gallery/shapes_and_collections/scatter.html#sphx-glr-gallery-shapes-and-collections-scatter-py
#https://stackoverflow.com/questions/3899980/how-to-change-the-font-size-on-a-matplotlib-plot
"""


"""#adding a line to scatterplot doodle
import numpy
from matplotlib import pyplot

x = numpy.random.normal(loc=30, scale=1, size=100)
y = 3*x + 5 + numpy.random.normal(loc=0, scale=1, size=100)

pyplot.scatter(x,y, color="grey")
pyplot.plot(x, 3*x+5, color="black") #add a line to plot
pyplot.title("a scatter plot")
pyplot.xlabel("x label")
pyplot.ylabel("y label")
pyplot.rcParams.update({'font.size':18}) #increasing font size
pyplot.show()

#inspired by
#https://stackoverflow.com/questions/45936630/how-to-put-line-plot-and-scatter-plot-on-the-same-plot-in
"""


"""#linear regression intro in python doodle
import numpy
from scipy import stats

numSamples = 1000
x = numpy.random.normal(loc=30, scale=1, size=numSamples)
y = 3*x + 5 + numpy.random.normal(loc=0, scale=1, size=numSamples)

result = stats.linregress(x,y)
slope = result[0]
intercept = result[1]
print("slope {0} intercept {1}".format(slope, intercept))

#slope 3.00593151709 intercept 4.86012707682

rsq = result[2]**2
print("rsquared {0}".format(rsq)) #a measure of fit. 1 is perfectly linear correlation
#rsquared 0.8982827724


#inspired by
#https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.linregress.html
"""



""" #histogram
import numpy
from matplotlib import pyplot

x = numpy.random.normal(loc=21, scale=1, size=1000)
pyplot.hist(x, color="grey")
pyplot.title("a Histogram")
pyplot.xlabel("doodles")
pyplot.ylabel("frequency")
pyplot.show()


#inspired by
#https://stackoverflow.com/questions/6352740/matplotlib-label-each-bin
#https://matplotlib.org/stable/gallery/statistics/hist.html#sphx-glr-gallery-statistics-hist-py
#https://stackoverflow.com/questions/2130913/no-plot-window-in-matplotlib
"""
#data visualization R doodle

######
#histogram
help(hist)
x <- rnorm(1000, mean=21, sd=1) #generates 1000 random numbers from normal distribution (most arround mean with fewer extreme bigs and smalls)
hist(x, col="grey", main="A Histogram", xlab="doodles", border="white")

#######


######
#scatter plot
x <- rnorm(100, mean=30, sd=1) #generates random normal numbers
y <- 3*x + 5 + rnorm(100, mean=0, sd=1)

plot(x,y, xlab="x label", ylab="y label", main="a scatterplot", type="p")
points(x,y, pch=21, bg="grey", col="grey") #so can get solid filled in circles in plot
lines(x, 3*x+5) #adds a line over plot
#######
 

 python


r



Happy Sketching!