Analysis of Variance models
In [1]: import statsmodels.api as sm
In [2]: from statsmodels.formula.api import ols
In [3]: moore = sm.datasets.get_rdataset("Moore", "car",
...: cache=True) # load data
...:
---------------------------------------------------------------------------
URLError Traceback (most recent call last)
<ipython-input-3-6d0888eba6f2> in <module>()
1 moore = sm.datasets.get_rdataset("Moore", "car",
----> 2 cache=True) # load data
/build/statsmodels-0.8.0~rc1+git43-g1ac3f11/debian/python-statsmodels/usr/lib/python2.7/dist-packages/statsmodels/datasets/utils.pyc in get_rdataset(dataname, package, cache)
287 "master/doc/"+package+"/rst/")
288 cache = _get_cache(cache)
--> 289 data, from_cache = _get_data(data_base_url, dataname, cache)
290 data = read_csv(data, index_col=0)
291 data = _maybe_reset_index(data)
/build/statsmodels-0.8.0~rc1+git43-g1ac3f11/debian/python-statsmodels/usr/lib/python2.7/dist-packages/statsmodels/datasets/utils.pyc in _get_data(base_url, dataname, cache, extension)
218 url = base_url + (dataname + ".%s") % extension
219 try:
--> 220 data, from_cache = _urlopen_cached(url, cache)
221 except HTTPError as err:
222 if '404' in str(err):
/build/statsmodels-0.8.0~rc1+git43-g1ac3f11/debian/python-statsmodels/usr/lib/python2.7/dist-packages/statsmodels/datasets/utils.pyc in _urlopen_cached(url, cache)
209 # not using the cache or didn't find it in cache
210 if not from_cache:
--> 211 data = urlopen(url).read()
212 if cache is not None: # then put it in the cache
213 _cache_it(data, cache_path)
/usr/lib/python2.7/urllib2.pyc in urlopen(url, data, timeout)
125 if _opener is None:
126 _opener = build_opener()
--> 127 return _opener.open(url, data, timeout)
128
129 def install_opener(opener):
/usr/lib/python2.7/urllib2.pyc in open(self, fullurl, data, timeout)
402 req = meth(req)
403
--> 404 response = self._open(req, data)
405
406 # post-process response
/usr/lib/python2.7/urllib2.pyc in _open(self, req, data)
420 protocol = req.get_type()
421 result = self._call_chain(self.handle_open, protocol, protocol +
--> 422 '_open', req)
423 if result:
424 return result
/usr/lib/python2.7/urllib2.pyc in _call_chain(self, chain, kind, meth_name, *args)
380 func = getattr(handler, meth_name)
381
--> 382 result = func(*args)
383 if result is not None:
384 return result
/usr/lib/python2.7/urllib2.pyc in https_open(self, req)
1220
1221 def https_open(self, req):
-> 1222 return self.do_open(httplib.HTTPSConnection, req)
1223
1224 https_request = AbstractHTTPHandler.do_request_
/usr/lib/python2.7/urllib2.pyc in do_open(self, http_class, req)
1182 except socket.error, err: # XXX what error?
1183 h.close()
-> 1184 raise URLError(err)
1185 else:
1186 try:
URLError: <urlopen error [Errno -2] Name or service not known>
In [4]: data = moore.data
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-4-07b15baaa84d> in <module>()
----> 1 data = moore.data
NameError: name 'moore' is not defined
In [5]: data = data.rename(columns={"partner.status":
...: "partner_status"}) # make name pythonic
...:
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-5-5d7d914cce96> in <module>()
----> 1 data = data.rename(columns={"partner.status":
2 "partner_status"}) # make name pythonic
3
NameError: name 'data' is not defined
In [6]: moore_lm = ols('conformity ~ C(fcategory, Sum)*C(partner_status, Sum)',
...: data=data).fit()
...:
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-6-0dd290a7e946> in <module>()
1 moore_lm = ols('conformity ~ C(fcategory, Sum)*C(partner_status, Sum)',
----> 2 data=data).fit()
NameError: name 'data' is not defined
In [7]: table = sm.stats.anova_lm(moore_lm, typ=2) # Type 2 ANOVA DataFrame
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-7-4e32df33effd> in <module>()
----> 1 table = sm.stats.anova_lm(moore_lm, typ=2) # Type 2 ANOVA DataFrame
NameError: name 'moore_lm' is not defined
In [8]: print(table)
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-8-d1f157ed9e2f> in <module>()
----> 1 print(table)
NameError: name 'table' is not defined
A more detailed example can be found here: