# coding: utf-8 # In[38]: obs = pd.read_csv("obs_matrix_newer_dataset.csv") obs = obs.drop(['map_Cursed Hollow', 'allied_char_Gall', 'opposing_char_Gall', 'opposing_char_Zeratul', 'allied_char_Zeratul'], axis=1) # In[39]: obs.head() # In[40]: obs.shape # In[41]: from statsmodels.discrete.discrete_model import Logit from statsmodels.tools import add_constant # In[43]: def demean(s): return s - s.mean() obs['allied_avg_mrr'] = demean(obs['allied_avg_mrr']) obs['allied_avg_hero_level'] = demean(obs['allied_avg_hero_level']) obs['opp_avg_hero_level'] = demean(obs['opp_avg_hero_level']) obs['opp_avg_mrr'] = demean(obs['opp_avg_mrr']) obs['allied_avg_mrr**2'] = obs['allied_avg_mrr']**2 obs['allied_avg_hero_level**2'] = obs['allied_avg_hero_level']**2 obs['opp_avg_hero_level**2'] = obs['opp_avg_hero_level']**2 obs['opp_avg_mrr**2'] = obs['opp_avg_mrr']**2 lg = Logit(obs['outcome'], add_constant(obs.drop('outcome', axis=1))) # In[44]: results = lg.fit() # In[45]: results.summary() # In[46]: x = results.params.filter(like='allied_char') # In[57]: y = pd.Series({c:obs.loc[obs[c].astype(bool)]['outcome'].mean() for c in x.index}) y_std = pd.Series({c:obs.loc[obs[c].astype(bool)]['outcome'].std()/np.sqrt(obs[c].astype(bool).sum()) for c in x.index}) # In[58]: y_std # In[48]: get_ipython().run_line_magic('matplotlib', 'inline') from matplotlib import pyplot as plt from IPython.display import set_matplotlib_formats set_matplotlib_formats('retina') plt.style.use('bmh') f, ax = plt.subplots(1,1,figsize=(12,12)) ax.scatter(x, y) for i, txt in enumerate(x.index): txt = txt.lstrip("allied_char_") ax.annotate(txt, (x[i]+0.005, y[i]), fontsize=9) #ax.set_ylabel("<--- lower naive winrate | higher naive winrate --->") plt.hlines(0.5, -0.5, 0.5, lw=1, color='gray') plt.vlines(0.0, 0.38, 0.6, lw=1, color='gray') plt.xlim(-0.5, 0.5) plt.ylim(0.38, 0.6) ax.text(-0.025, 0.59, 'higher naive win rate -->', style='italic', rotation=90, fontsize=13) ax.text(-0.025, 0.44, '<-- lower naive win rate', style='italic', rotation=90, fontsize=13) ax.text(0.22, 0.503, 'higher causal win rate -->', style='italic', fontsize=13) ax.text(-0.465, 0.503, '<-- lower causal win rate', style='italic', fontsize=13) plt.ylabel("Naive win rate") plt.xlabel("Causal win rate") plt.savefig("causal_v_naive.png", bbox_inches='tight') # In[49]: pd.DataFrame({'naive': y, 'causal': x}).to_csv("20180710_results.csv") # In[37]: f, ax = plt.subplots(1,1,figsize=(15,12)) yerr = results.bse.filter(like='allied_char') r = pd.DataFrame({'param': x, 'error': 1.96*yerr}) r.index = [_.lstrip('allied_char_') for _ in r.index] r.sort_values('param').plot.barh(y='param', ax=ax, color='#348ABD', xerr='error', lw=0.1) plt.savefig("coefs.png", bbox_inches='tight') # In[22]: x.sort_values().index[int(len(x)/2)]