Demo of Statistical Data Analysis using Python:

In [12]:
# Python3 code to show Box-cox Transformation 
# of non-normal data 
%matplotlib notebook
# import modules 
import numpy as np 
from scipy import stats 

# plotting modules 
'''
import seaborn as sns 
import matplotlib.pyplot as plt 

import scipy
from matplotlib import gridspec
import matplotlib.ticker as ticker
from scipy import optimize
from matplotlib.ticker import AutoMinorLocator
from scipy.optimize import curve_fit
import statsmodels.api as sm
from lmfit.models import LorentzianModel
'''
Out[12]:
'\nimport seaborn as sns \nimport matplotlib.pyplot as plt \n\nimport scipy\nfrom matplotlib import gridspec\nimport matplotlib.ticker as ticker\nfrom scipy import optimize\nfrom matplotlib.ticker import AutoMinorLocator\nfrom scipy.optimize import curve_fit\nimport statsmodels.api as sm\nfrom lmfit.models import LorentzianModel\n'
In [11]:
# generate non-normal data (exponential) 
original_data = np.random.exponential(size = 1000) 

# transform training data & save lambda value 
fitted_data, fitted_lambda = stats.boxcox(original_data) 

# creating axes to draw plots 
fig, ax = plt.subplots(1, 2) 

# plotting the original data(non-normal) and 
# fitted data (normal) 
sns.distplot(original_data, hist = False, kde = True, kde_kws = {'shade': True, 'linewidth': 2}, label = "Non-Normal", color ="green", ax = ax[0]) 

sns.distplot(fitted_data, hist = False, kde = True, kde_kws = {'shade': True, 'linewidth': 2}, label = "Normal", color ="green", ax = ax[1]) 

# adding legends to the subplots 
plt.legend(loc = "upper right") 

# rescaling the subplots 
fig.set_figheight(5) 
fig.set_figwidth(10) 

print(f"Lambda value used for Transformation: {fitted_lambda}") 
Lambda value used for Transformation: 0.29557219226167036
In [15]:
x = fitted_data
y = x*x
# plotting 
plt.title("Line graph")  
plt.xlabel("X axis")  
plt.ylabel("Y axis")  
plt.plot(x, y, color ="red")  
plt.show()
In [ ]: