Logo

Comparing OLS and RLMΒΆ

In [1]: import numpy as np

from scipy import stats

In [2]: import statsmodels.api as sm

In [3]: import matplotlib.pyplot as plt

In [4]: from statsmodels.sandbox.regression.predstd import wls_prediction_std

In [5]: nsample = 50

In [6]: x1 = np.linspace(0, 20, nsample)

In [7]: X = np.c_[x1, (x1-5)**2, np.ones(nsample)]

In [8]: sig = 0.3   # smaller error variance makes OLS<->RLM contrast bigger

In [9]: beta = [0.5, -0.0, 5.]

In [10]: y_true2 = np.dot(X, beta)

In [11]: y2 = y_true2 + sig*1. * np.random.normal(size=nsample)

In [12]: y2[[39,41,43,45,48]] -= 5   # add some outliers (10% of nsample)

Example: estimate quadratic function (true is linear)

In [13]: plt.figure()
Out[13]: <matplotlib.figure.Figure at 0x5811210>

In [14]: plt.plot(x1, y2, 'o', x1, y_true2, 'b-')
Out[14]: 
[<matplotlib.lines.Line2D at 0x4be7910>,
 <matplotlib.lines.Line2D at 0x6c69810>]

In [15]: res = sm.OLS(y2, X).fit()

In [16]: print res.params
[ 0.52041769 -0.01343655  5.10209124]

Note: quadratic term captures outlier effect

In [17]: print res.bse
[ 0.07137208  0.00631533  0.46229484]

print res.predict plt.plot(x1, res.fittedvalues, ‘r–’)

In [18]: prstd, iv_l, iv_u = wls_prediction_std(res)

In [19]: plt.plot(x1, res.fittedvalues, 'r-')
Out[19]: [<matplotlib.lines.Line2D at 0x6c69d10>]

In [20]: plt.plot(x1, iv_u, 'r--')
Out[20]: [<matplotlib.lines.Line2D at 0x6c69d50>]

In [21]: plt.plot(x1, iv_l, 'r--')
Out[21]: [<matplotlib.lines.Line2D at 0x62cb510>]

compare with robust estimator

In [22]: resrlm = sm.RLM(y2, X).fit()

In [23]: print resrlm.params
[ 0.50899669 -0.00311252  5.02369257]

In [24]: print resrlm.bse
[ 0.0194497   0.001721    0.12598057]

In [25]: plt.plot(x1, resrlm.fittedvalues, 'g.-')
Out[25]: [<matplotlib.lines.Line2D at 0x6444850>]

In [26]: plt.title('blue: true,   red: OLS,   green: RLM')
Out[26]: <matplotlib.text.Text at 0x57e6e50>

Example: estimate linear function (true is linear)

In [27]: X2 = X[:,[0,2]]  # use only linear term and constant

In [28]: plt.figure()
Out[28]: <matplotlib.figure.Figure at 0x6444d90>

In [29]: plt.plot(x1, y2, 'o', x1, y_true2, 'b-')
Out[29]: 
[<matplotlib.lines.Line2D at 0x57eca90>,
 <matplotlib.lines.Line2D at 0x57ec090>]

In [30]: res2 = sm.OLS(y2, X2).fit()

In [31]: print res2.params
[ 0.38605223  5.64366631]

Note: quadratic term captures outlier effect

In [32]: print res2.bse
[ 0.03445102  0.3998306 ]

print res2.predict

In [33]: prstd, iv_l, iv_u = wls_prediction_std(res2)

In [34]: plt.plot(x1, res2.fittedvalues, 'r-')
Out[34]: [<matplotlib.lines.Line2D at 0x57ece10>]

In [35]: plt.plot(x1, iv_u, 'r--')
Out[35]: [<matplotlib.lines.Line2D at 0x57ec350>]

In [36]: plt.plot(x1, iv_l, 'r--')
Out[36]: [<matplotlib.lines.Line2D at 0x64447d0>]

compare with robust estimator

In [37]: resrlm2 = sm.RLM(y2, X2).fit()

In [38]: print resrlm2.params
[ 0.48344129  5.12227932]

In [39]: print resrlm2.bse
[ 0.0094077   0.10918363]

In [40]: plt.plot(x1, resrlm2.fittedvalues, 'g.-')
Out[40]: [<matplotlib.lines.Line2D at 0x57d7090>]

In [41]: plt.title('blue: true,   red: OLS,   green: RLM')
Out[41]: <matplotlib.text.Text at 0x5bda750>

see also help(sm.RLM.fit) for more options and module sm.robust.scale for scale options

plt.show()

This Page