In [1]: import numpy as np
from scipy import stats
In [2]: import statsmodels.api as sm
In [3]: import matplotlib.pyplot as plt
In [4]: from statsmodels.sandbox.regression.predstd import wls_prediction_std
In [5]: nsample = 50
In [6]: x1 = np.linspace(0, 20, nsample)
In [7]: X = np.c_[x1, (x1-5)**2, np.ones(nsample)]
In [8]: sig = 0.3 # smaller error variance makes OLS<->RLM contrast bigger
In [9]: beta = [0.5, -0.0, 5.]
In [10]: y_true2 = np.dot(X, beta)
In [11]: y2 = y_true2 + sig*1. * np.random.normal(size=nsample)
In [12]: y2[[39,41,43,45,48]] -= 5 # add some outliers (10% of nsample)
Example: estimate quadratic function (true is linear)
In [13]: plt.figure()
Out[13]: <matplotlib.figure.Figure at 0x5811210>
In [14]: plt.plot(x1, y2, 'o', x1, y_true2, 'b-')
Out[14]:
[<matplotlib.lines.Line2D at 0x4be7910>,
<matplotlib.lines.Line2D at 0x6c69810>]
In [15]: res = sm.OLS(y2, X).fit()
In [16]: print res.params
[ 0.52041769 -0.01343655 5.10209124]
Note: quadratic term captures outlier effect
In [17]: print res.bse
[ 0.07137208 0.00631533 0.46229484]
print res.predict plt.plot(x1, res.fittedvalues, ‘r–’)
In [18]: prstd, iv_l, iv_u = wls_prediction_std(res)
In [19]: plt.plot(x1, res.fittedvalues, 'r-')
Out[19]: [<matplotlib.lines.Line2D at 0x6c69d10>]
In [20]: plt.plot(x1, iv_u, 'r--')
Out[20]: [<matplotlib.lines.Line2D at 0x6c69d50>]
In [21]: plt.plot(x1, iv_l, 'r--')
Out[21]: [<matplotlib.lines.Line2D at 0x62cb510>]
compare with robust estimator
In [22]: resrlm = sm.RLM(y2, X).fit()
In [23]: print resrlm.params
[ 0.50899669 -0.00311252 5.02369257]
In [24]: print resrlm.bse
[ 0.0194497 0.001721 0.12598057]
In [25]: plt.plot(x1, resrlm.fittedvalues, 'g.-')
Out[25]: [<matplotlib.lines.Line2D at 0x6444850>]
In [26]: plt.title('blue: true, red: OLS, green: RLM')
Out[26]: <matplotlib.text.Text at 0x57e6e50>
Example: estimate linear function (true is linear)
In [27]: X2 = X[:,[0,2]] # use only linear term and constant
In [28]: plt.figure()
Out[28]: <matplotlib.figure.Figure at 0x6444d90>
In [29]: plt.plot(x1, y2, 'o', x1, y_true2, 'b-')
Out[29]:
[<matplotlib.lines.Line2D at 0x57eca90>,
<matplotlib.lines.Line2D at 0x57ec090>]
In [30]: res2 = sm.OLS(y2, X2).fit()
In [31]: print res2.params
[ 0.38605223 5.64366631]
Note: quadratic term captures outlier effect
In [32]: print res2.bse
[ 0.03445102 0.3998306 ]
print res2.predict
In [33]: prstd, iv_l, iv_u = wls_prediction_std(res2)
In [34]: plt.plot(x1, res2.fittedvalues, 'r-')
Out[34]: [<matplotlib.lines.Line2D at 0x57ece10>]
In [35]: plt.plot(x1, iv_u, 'r--')
Out[35]: [<matplotlib.lines.Line2D at 0x57ec350>]
In [36]: plt.plot(x1, iv_l, 'r--')
Out[36]: [<matplotlib.lines.Line2D at 0x64447d0>]
compare with robust estimator
In [37]: resrlm2 = sm.RLM(y2, X2).fit()
In [38]: print resrlm2.params
[ 0.48344129 5.12227932]
In [39]: print resrlm2.bse
[ 0.0094077 0.10918363]
In [40]: plt.plot(x1, resrlm2.fittedvalues, 'g.-')
Out[40]: [<matplotlib.lines.Line2D at 0x57d7090>]
In [41]: plt.title('blue: true, red: OLS, green: RLM')
Out[41]: <matplotlib.text.Text at 0x5bda750>
see also help(sm.RLM.fit) for more options and module sm.robust.scale for scale options
plt.show()