# coding: utf-8

# In[38]:


obs = pd.read_csv("obs_matrix_newer_dataset.csv")
obs = obs.drop(['map_Cursed Hollow', 'allied_char_Gall', 'opposing_char_Gall', 'opposing_char_Zeratul', 'allied_char_Zeratul'], axis=1)


# In[39]:


obs.head()


# In[40]:


obs.shape


# In[41]:


from statsmodels.discrete.discrete_model import Logit
from statsmodels.tools import add_constant


# In[43]:


def demean(s):
    return s - s.mean()


obs['allied_avg_mrr']        = demean(obs['allied_avg_mrr'])
obs['allied_avg_hero_level'] = demean(obs['allied_avg_hero_level'])
obs['opp_avg_hero_level']    = demean(obs['opp_avg_hero_level'])
obs['opp_avg_mrr']           = demean(obs['opp_avg_mrr'])


obs['allied_avg_mrr**2']          = obs['allied_avg_mrr']**2
obs['allied_avg_hero_level**2']   = obs['allied_avg_hero_level']**2
obs['opp_avg_hero_level**2']      = obs['opp_avg_hero_level']**2
obs['opp_avg_mrr**2']             = obs['opp_avg_mrr']**2
      

lg = Logit(obs['outcome'], add_constant(obs.drop('outcome', axis=1)))


# In[44]:


results = lg.fit()


# In[45]:


results.summary()


# In[46]:


x = results.params.filter(like='allied_char')


# In[57]:


y = pd.Series({c:obs.loc[obs[c].astype(bool)]['outcome'].mean() for c in x.index})
y_std = pd.Series({c:obs.loc[obs[c].astype(bool)]['outcome'].std()/np.sqrt(obs[c].astype(bool).sum()) for c in x.index})


# In[58]:


y_std


# In[48]:


get_ipython().run_line_magic('matplotlib', 'inline')
from matplotlib import pyplot as plt
from IPython.display import set_matplotlib_formats
set_matplotlib_formats('retina')
plt.style.use('bmh')

f, ax = plt.subplots(1,1,figsize=(12,12))
ax.scatter(x, y)

for i, txt in enumerate(x.index):
    txt = txt.lstrip("allied_char_")
    ax.annotate(txt, (x[i]+0.005, y[i]), fontsize=9)
    
#ax.set_ylabel("<--- lower naive winrate | higher naive winrate --->")

plt.hlines(0.5, -0.5, 0.5, lw=1, color='gray')
plt.vlines(0.0, 0.38, 0.6, lw=1, color='gray')

plt.xlim(-0.5, 0.5)
plt.ylim(0.38, 0.6)

ax.text(-0.025, 0.59, 'higher naive win rate -->', style='italic', rotation=90, fontsize=13)
ax.text(-0.025, 0.44, '<-- lower naive win rate', style='italic', rotation=90, fontsize=13)
ax.text(0.22, 0.503, 'higher causal win rate -->', style='italic', fontsize=13)
ax.text(-0.465, 0.503, '<-- lower causal win rate', style='italic', fontsize=13)


plt.ylabel("Naive win rate")
plt.xlabel("Causal win rate")

plt.savefig("causal_v_naive.png", bbox_inches='tight')


# In[49]:


pd.DataFrame({'naive': y, 'causal': x}).to_csv("20180710_results.csv")


# In[37]:


f, ax = plt.subplots(1,1,figsize=(15,12))

yerr = results.bse.filter(like='allied_char')

r = pd.DataFrame({'param': x, 'error': 1.96*yerr})
r.index = [_.lstrip('allied_char_') for _ in r.index]

r.sort_values('param').plot.barh(y='param', ax=ax, color='#348ABD', xerr='error', lw=0.1)
plt.savefig("coefs.png", bbox_inches='tight')


# In[22]:


x.sort_values().index[int(len(x)/2)]