In [1]: import numpy as np
In [2]: import statsmodels.api as sm
Create some data
In [3]: nsample = 50
In [4]: sig = 0.25
In [5]: x1 = np.linspace(0, 20, nsample)
In [6]: X = np.c_[x1, np.sin(x1), (x1-5)**2, np.ones(nsample)]
In [7]: beta = [0.5, 0.5, -0.02, 5.]
In [8]: y_true = np.dot(X, beta)
In [9]: y = y_true + sig * np.random.normal(size=nsample)
Setup and estimate the model
In [10]: olsmod = sm.OLS(y, X)
In [11]: olsres = olsmod.fit()
In [12]: print olsres.params
[ 0.49256098 0.4686888 -0.01884563 5.0396749 ]
In [13]: print olsres.bse
[ 0.01559678 0.06131279 0.00136941 0.10113014]
In-sample prediction
In [14]: ypred = olsres.predict(X)
Create a new sample of explanatory variables Xnew, predict and plot
In [15]: x1n = np.linspace(20.5,25, 10)
In [16]: Xnew = np.c_[x1n, np.sin(x1n), (x1n-5)**2, np.ones(10)]
In [17]: ynewpred = olsres.predict(Xnew) # predict out of sample
In [18]: print ypred
[ 4.56853404 5.02939457 5.45341099 5.81504017 6.09795738
6.29773836 6.42258623 6.49198375 6.53349236 6.57822388
6.65572898 6.78914203 6.99138024 7.26302136 7.59220879
7.95659954 8.32703518 8.67233245 8.96440657 9.18288554
9.31845713 9.37439892 9.36603978 9.31824135 9.2613134
9.22603396 9.23859264 9.31628767 9.46468394 9.67669865
9.93376499 10.20888265 10.47105386 10.69037626 10.84295575
10.91483226 10.9042733 10.82205864 10.68970922 10.53595131
10.39199686 10.2864155 10.24044057 10.2644806 10.35640919
10.50191438 10.67685006 10.85120311 10.99402445 11.07851394]
In [19]: import matplotlib.pyplot as plt
In [20]: plt.figure()
Out[20]: <matplotlib.figure.Figure at 0x6d22290>
In [21]: plt.plot(x1, y, 'o', x1, y_true, 'b-')
Out[21]:
[<matplotlib.lines.Line2D at 0x6632890>,
<matplotlib.lines.Line2D at 0x6628ad0>]
In [22]: plt.plot(np.hstack((x1, x1n)), np.hstack((ypred, ynewpred)),'r')
Out[22]: [<matplotlib.lines.Line2D at 0x6628fd0>]
In [23]: plt.title('OLS prediction, blue: true and data, fitted/predicted values:red')
Out[23]: <matplotlib.text.Text at 0x63b1fd0>