{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": false }, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "import matplotlib.pyplot as plt\n", "import matplotlib.cm as cm\n", "import sklearn\n", "from sklearn.cross_validation import train_test_split\n", "from sklearn.metrics import roc_auc_score\n", "%matplotlib inline" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": true }, "outputs": [], "source": [ "train = pd.read_csv('data/features.csv')\n", "test = pd.read_csv('data/features_test.csv')" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": false }, "outputs": [], "source": [ "train, validation = train_test_split(train, train_size=0.9)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
match_idstart_timelobby_typer1_heror1_levelr1_xpr1_goldr1_lhr1_killsr1_deaths...dire_boots_countdire_ward_observer_countdire_ward_sentry_countdire_first_ward_timedurationradiant_wintower_status_radianttower_status_direbarracks_status_radiantbarracks_status_dire
167101968814377412481395216020912021...330-33.03243119201536480
295643475814426122647673106710711201...23114.02679001972063
417714913014448441271464195119281320...33110.019900192619745163
501625898614466645511464176812991700...321-24.020441203615426335
438105152214452672981103108317412410...320-37.01927119830630
\n", "

5 rows × 109 columns

\n", "
" ], "text/plain": [ " match_id start_time lobby_type r1_hero r1_level r1_xp r1_gold \\\n", "16710 19688 1437741248 1 39 5 2160 2091 \n", "29564 34758 1442612264 7 67 3 1067 1071 \n", "41771 49130 1444844127 1 46 4 1951 1928 \n", "50162 58986 1446664551 1 46 4 1768 1299 \n", "43810 51522 1445267298 1 10 3 1083 1741 \n", "\n", " r1_lh r1_kills r1_deaths ... dire_boots_count \\\n", "16710 20 2 1 ... 3 \n", "29564 12 0 1 ... 2 \n", "41771 13 2 0 ... 3 \n", "50162 17 0 0 ... 3 \n", "43810 24 1 0 ... 3 \n", "\n", " dire_ward_observer_count dire_ward_sentry_count dire_first_ward_time \\\n", "16710 3 0 -33.0 \n", "29564 3 1 14.0 \n", "41771 3 1 10.0 \n", "50162 2 1 -24.0 \n", "43810 2 0 -37.0 \n", "\n", " duration radiant_win tower_status_radiant tower_status_dire \\\n", "16710 3243 1 1920 1536 \n", "29564 2679 0 0 1972 \n", "41771 1990 0 1926 1974 \n", "50162 2044 1 2036 1542 \n", "43810 1927 1 1983 0 \n", "\n", " barracks_status_radiant barracks_status_dire \n", "16710 48 0 \n", "29564 0 63 \n", "41771 51 63 \n", "50162 63 35 \n", "43810 63 0 \n", "\n", "[5 rows x 109 columns]" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "train.head()" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "radiant_win 1.000000\n", "barracks_status_radiant 0.747533\n", "tower_status_radiant 0.729062\n", "radiant_boots_count 0.119573\n", "first_blood_player2 0.114069\n", "r2_gold 0.099887\n", "r5_gold 0.094885\n", "r1_gold 0.094100\n", "r4_gold 0.092888\n", "dire_tpscroll_count 0.088654\n", "r3_gold 0.088604\n", "d1_deaths 0.085848\n", "d5_deaths 0.085722\n", "r1_kills 0.083411\n", "d2_deaths 0.080585\n", "d3_deaths 0.080545\n", "d4_deaths 0.080394\n", "r2_kills 0.077740\n", "r3_kills 0.076147\n", "r4_kills 0.074169\n", "r5_kills 0.073717\n", "dire_bottle_time 0.065992\n", "r2_lh 0.064364\n", "r2_xp 0.061239\n", "r5_xp 0.061105\n", "r5_lh 0.059644\n", "r4_xp 0.059320\n", "r4_lh 0.058043\n", "r5_items 0.054113\n", "r1_lh 0.053880\n", " ... \n", "d1_xp -0.053137\n", "d4_lh -0.053810\n", "d1_lh -0.056920\n", "d5_lh -0.059155\n", "radiant_bottle_time -0.061444\n", "d2_xp -0.061448\n", "d5_xp -0.063288\n", "d2_lh -0.063424\n", "d4_kills -0.074616\n", "d2_kills -0.075567\n", "r1_deaths -0.078361\n", "d1_kills -0.078879\n", "duration -0.079196\n", "d3_kills -0.079586\n", "d5_kills -0.080044\n", "r3_deaths -0.081836\n", "radiant_tpscroll_count -0.081850\n", "r2_deaths -0.082092\n", "r5_deaths -0.085831\n", "r4_deaths -0.087646\n", "d3_gold -0.087860\n", "d4_gold -0.090525\n", "d5_gold -0.096247\n", "d1_gold -0.096250\n", "d2_gold -0.099091\n", "dire_boots_count -0.108562\n", "first_blood_player1 -0.131358\n", "first_blood_team -0.149884\n", "barracks_status_dire -0.690224\n", "tower_status_dire -0.695674\n", "dtype: float64" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "train.corrwith(train['radiant_win']).sort_values(ascending=False)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Пробуем идеи и смотрим на качество на train" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Идея 1: если команда сделала first_blood (первая убила противника), она выиграет матч" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Training accuracy = 0.5534\n" ] } ], "source": [ "def predict_first_blood(df):\n", " df['first_blood_prediction'] = (df.first_blood_team == 0.0).astype(int)\n", "\n", "predict_first_blood(train)\n", "print('Training accuracy = %0.4f' % (train['first_blood_prediction'] == train['radiant_win']).mean())" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Но важнее AUC" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Training ROC AUC score = 0.5578\n" ] } ], "source": [ "print('Training ROC AUC score = %0.4f' % roc_auc_score(train['radiant_win'], train['first_blood_prediction']))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Идея 2: если команда купила много ботинок, она вероятнее выиграет матч" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Training ROC AUC score = 0.5662\n" ] } ], "source": [ "def predict_boots(df):\n", " df['boots_prediction'] = df.radiant_boots_count.astype(int)\n", "\n", "predict_boots(train)\n", "print('Training ROC AUC score = %0.4f' % roc_auc_score(train['radiant_win'], train['boots_prediction']))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Идея с ботинками оказалась лучше! Проверим качество на валидации" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Validation ROC AUC score = 0.5644\n", "Validation ROC AUC score = 0.5640\n" ] } ], "source": [ "predict_boots(validation)\n", "predict_first_blood(validation)\n", "print('Validation ROC AUC score = %0.4f' % roc_auc_score(validation['radiant_win'], validation['first_blood_prediction']))\n", "print('Validation ROC AUC score = %0.4f' % roc_auc_score(validation['radiant_win'], validation['boots_prediction']))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Гипотеза подтвердилась и на валидации. Засылаем!" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "collapsed": false }, "outputs": [], "source": [ "predict_boots(test)\n", "test['radiant_win'] = test['boots_prediction']\n", "test[['match_id', 'radiant_win']].to_csv('prediction.csv', index=False)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 2", "language": "python", "name": "python2" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.11" } }, "nbformat": 4, "nbformat_minor": 1 }