italo-batista · May 30, 2018 21:30
diff --git a/experimento_v2.ipynb b/experimento_v2.ipynb
 {
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "from scipy.special import comb\n",
    "from scipy.special import perm\n",
    "from pprint import pprint\n",
    "from functools import reduce\n",
    "from collections import Counter\n",
    "import json\n",
    "from __future__ import division\n",
    "from sympy.interactive import printing\n",
    "printing.init_printing()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Importando e filtrando dados:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "file_path = '../dados/cooked/epollog_tds.csv'\n",
    "requests = pd.DataFrame(columns=['DATE', 'REQUEST_TYPE', 'REQUEST', 'MAJOR_VS'])\n",
    "requests = pd.read_csv(file_path, delimiter=\";\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "post_request_regex = '(caso)/([a-zA-z0-9]*)'\n",
    "get_request_regex = '(caso)/([0-9]*)/([a-z]){4}/([0-9]*)'\n",
    "posts = requests.loc[\n",
    "    ((requests['REQUEST_TYPE'] == 'POST') & \n",
    "     (requests['REQUEST'].str.match(post_request_regex)))]\n",
    "gets = requests.loc[\n",
    "    ((requests['REQUEST_TYPE'] == 'GET') & \n",
    "     (requests['REQUEST'].str.match(get_request_regex)))]\n",
    "requests = posts.append(gets, ignore_index=True)\n",
    "requests = requests.loc[(requests['MAJOR_VS'] != \"1.33.0\")]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Módulos auxiliares:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "class Method(object):\n",
    "    \n",
    "    def __init__(self, name, class_name, params):\n",
    "        self.name = name\n",
    "        self.class_name = class_name\n",
    "        self.params = list(filter(lambda p: p != \"\", params))\n",
    "        self.id = self.__hash__()        \n",
    "        self.prob = 0.5\n",
    "        \n",
    "    def reset_prob(self):\n",
    "        self.prob = 0.5\n",
    "          \n",
    "    def __str__(self):\n",
    "        return str(self.__dict__)\n",
    "\n",
    "    def __eq__(self, other): \n",
    "        return self.__dict__ == other.__dict__\n",
    "        \n",
    "    def __hash__(self):\n",
    "        hashed_id = self.class_name + \".\" + self.name\n",
    "        return hash(hashed_id)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "def load_version_impact_json(vs):\n",
    "    version_impact_path = '../dados/cooked/versionImpact/'\n",
    "    return json.load(open(version_impact_path + vs + '.json'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [],
   "source": [
    "def load_endpoints_json(vs):\n",
    "    endpoints_path = '../dados/cooked/endpoints/'\n",
    "    return json.load(open(endpoints_path + vs + '.json'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_used_methods(endpoints_json, target_endpoint, rest_type):\n",
    "    \"\"\"\n",
    "        Given a json to map endpoints and its used methods to retrieve data, returns a list of \n",
    "        methods used to accomplish endpoint purpose. \n",
    "    \"\"\"\n",
    "    used_methods = []\n",
    "    for endpoint_obj in endpoints_json[\"endpointList\"]:\n",
    "        if target_endpoint == endpoint_obj[\"endpoint\"]:\n",
    "            if endpoint_obj[\"verb\"] == rest_type:\n",
    "                methods = endpoint_obj[\"impactedMethodsListOption\"]\n",
    "                for method in methods:\n",
    "                    name = method[\"methodName\"]\n",
    "                    params = method['parameters']\n",
    "                    pkcg = method[\"classDeclaration\"].split(\".\")[-1]\n",
    "                    method_obj = Method(name, pkcg, params)\n",
    "                    used_methods.append(method_obj)\n",
    "    return used_methods"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_changed_methods(impacts_json):\n",
    "    \"\"\"\n",
    "        Returns the methods changed from the previous version to the current version indicate by impacts_json.\n",
    "    \"\"\"\n",
    "    methods_changed = []\n",
    "    for impacted_methods in impacts_json[\"impactList\"]:\n",
    "        pckg_full_string = impacted_methods[\"fileName\"]\n",
    "        pckg_clean = pckg_full_string.replace(\".java\", \"\")\n",
    "        class_name = pckg_clean.split(\"/\")[-1]\n",
    "        for method_name in impacted_methods[\"methods\"]:\n",
    "            methods_changed.append(class_name + \".\" + method_name)\n",
    "    return methods_changed"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Experimento:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [],
   "source": [
    "VS_INDEX = 1\n",
    "REQUEST_INDEX = 0    \n",
    "last_rest_request=''\n",
    "output = pd.DataFrame(columns=[\n",
    "    'EDNPOINT', 'MAJOR_VS', 'TOTAL_CHANGED_METHODS', 'CHANGED_ENDPOINT_METHODS', 'PROB_OF_SUCCESS', 'TOTAL_METHODS'])\n",
    "\n",
    "iter_i = 0\n",
    "for group, group_itens in requests.groupby(['REQUEST_TYPE', 'MAJOR_VS']):\n",
    "\n",
    "    vs = group[VS_INDEX][:-2]\n",
    "    rest_request = group[REQUEST_INDEX]\n",
    "    n_requests = len(group_itens.get_values())     \n",
    "\n",
    "    if last_rest_request != rest_request:\n",
    "        methods_dict = dict()    \n",
    "    \n",
    "    endpoints = load_endpoints_json(vs)                                \n",
    "    methods_used_by_endpoint = get_used_methods(endpoints, '/caso', rest_request) \n",
    "    impacts = load_version_impact_json(vs)\n",
    "    changed_methods_name = get_changed_methods(impacts)\n",
    "                \n",
    "    # adding a new methods used by endpoint in dict\n",
    "    for method in methods_used_by_endpoint:\n",
    "        if method.id not in methods_dict:\n",
    "            methods_dict[method.id] = method\n",
    "                \n",
    "    changed_methods_not_used = 0\n",
    "    # for methods changed by curr vs, reset success prob\n",
    "    for method_name in changed_methods_name:            \n",
    "        method_id = hash(method_name)\n",
    "        if method_id not in methods_dict:\n",
    "            changed_methods_not_used += 1\n",
    "            continue\n",
    "        method = methods_dict[method_id]\n",
    "        method.reset_prob()            \n",
    "\n",
    "    # for methods used in endpoint, get their history in methods_dict\n",
    "    for i in range(len(methods_used_by_endpoint)):\n",
    "        method_id = methods_used_by_endpoint[i].id\n",
    "        methods_used_by_endpoint[i] = methods_dict[method_id]\n",
    "        #print(methods_dict[method_id].prob)\n",
    "        \n",
    "    # calc endpoint prob of success\n",
    "    initializer = 0\n",
    "    iterable = methods_used_by_endpoint\n",
    "    sum_probs = reduce(lambda sum_prob, method: sum_prob + method.prob, \n",
    "                       iterable, \n",
    "                       initializer)\n",
    "    prob = sum_probs / float(len(methods_used_by_endpoint))\n",
    "        \n",
    "    # update methods probs\n",
    "    for method in methods_used_by_endpoint:\n",
    "        new_prob = (method.prob + 1) / 2.0\n",
    "        method.prob = new_prob\n",
    "        methods_dict[method.id] = method            \n",
    "\n",
    "    output.loc[iter_i] = [rest_request, vs, len(changed_methods_name), len(changed_methods_name) - changed_methods_not_used, round(prob, 5), len(methods_used_by_endpoint)]\n",
    "    iter_i += 1\n",
    "        \n",
    "    last_rest_request = rest_request"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>EDNPOINT</th>\n",
       "      <th>MAJOR_VS</th>\n",
       "      <th>TOTAL_CHANGED_METHODS</th>\n",
       "      <th>CHANGED_ENDPOINT_METHODS</th>\n",
       "      <th>PROB_OF_SUCCESS</th>\n",
       "      <th>TOTAL_METHODS</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>GET</td>\n",
       "      <td>1.24</td>\n",
       "      <td>128</td>\n",
       "      <td>0</td>\n",
       "      <td>0.50000</td>\n",
       "      <td>36</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>GET</td>\n",
       "      <td>1.25</td>\n",
       "      <td>563</td>\n",
       "      <td>0</td>\n",
       "      <td>0.75000</td>\n",
       "      <td>36</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>GET</td>\n",
       "      <td>1.26</td>\n",
       "      <td>228</td>\n",
       "      <td>0</td>\n",
       "      <td>0.87500</td>\n",
       "      <td>36</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>GET</td>\n",
       "      <td>1.27</td>\n",
       "      <td>106</td>\n",
       "      <td>0</td>\n",
       "      <td>0.93750</td>\n",
       "      <td>36</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>GET</td>\n",
       "      <td>1.28</td>\n",
       "      <td>1224</td>\n",
       "      <td>3</td>\n",
       "      <td>0.92969</td>\n",
       "      <td>36</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>GET</td>\n",
       "      <td>1.29</td>\n",
       "      <td>213</td>\n",
       "      <td>0</td>\n",
       "      <td>0.96484</td>\n",
       "      <td>36</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>GET</td>\n",
       "      <td>1.30</td>\n",
       "      <td>710</td>\n",
       "      <td>2</td>\n",
       "      <td>0.94595</td>\n",
       "      <td>37</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>GET</td>\n",
       "      <td>1.31</td>\n",
       "      <td>393</td>\n",
       "      <td>2</td>\n",
       "      <td>0.97338</td>\n",
       "      <td>27</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>GET</td>\n",
       "      <td>1.32</td>\n",
       "      <td>393</td>\n",
       "      <td>1</td>\n",
       "      <td>0.96824</td>\n",
       "      <td>27</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>POST</td>\n",
       "      <td>1.24</td>\n",
       "      <td>128</td>\n",
       "      <td>1</td>\n",
       "      <td>0.50000</td>\n",
       "      <td>272</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>POST</td>\n",
       "      <td>1.25</td>\n",
       "      <td>563</td>\n",
       "      <td>4</td>\n",
       "      <td>0.75246</td>\n",
       "      <td>273</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>POST</td>\n",
       "      <td>1.26</td>\n",
       "      <td>228</td>\n",
       "      <td>4</td>\n",
       "      <td>0.87133</td>\n",
       "      <td>273</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>POST</td>\n",
       "      <td>1.27</td>\n",
       "      <td>106</td>\n",
       "      <td>2</td>\n",
       "      <td>0.93118</td>\n",
       "      <td>272</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>POST</td>\n",
       "      <td>1.28</td>\n",
       "      <td>1224</td>\n",
       "      <td>18</td>\n",
       "      <td>0.91566</td>\n",
       "      <td>288</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>POST</td>\n",
       "      <td>1.29</td>\n",
       "      <td>213</td>\n",
       "      <td>0</td>\n",
       "      <td>0.96212</td>\n",
       "      <td>288</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>POST</td>\n",
       "      <td>1.30</td>\n",
       "      <td>710</td>\n",
       "      <td>10</td>\n",
       "      <td>0.89524</td>\n",
       "      <td>341</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>POST</td>\n",
       "      <td>1.31</td>\n",
       "      <td>393</td>\n",
       "      <td>9</td>\n",
       "      <td>0.93803</td>\n",
       "      <td>340</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>POST</td>\n",
       "      <td>1.32</td>\n",
       "      <td>393</td>\n",
       "      <td>10</td>\n",
       "      <td>0.94741</td>\n",
       "      <td>347</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   EDNPOINT MAJOR_VS TOTAL_CHANGED_METHODS CHANGED_ENDPOINT_METHODS  \\\n",
       "0       GET     1.24                   128                        0   \n",
       "1       GET     1.25                   563                        0   \n",
       "2       GET     1.26                   228                        0   \n",
       "3       GET     1.27                   106                        0   \n",
       "4       GET     1.28                  1224                        3   \n",
       "5       GET     1.29                   213                        0   \n",
       "6       GET     1.30                   710                        2   \n",
       "7       GET     1.31                   393                        2   \n",
       "8       GET     1.32                   393                        1   \n",
       "9      POST     1.24                   128                        1   \n",
       "10     POST     1.25                   563                        4   \n",
       "11     POST     1.26                   228                        4   \n",
       "12     POST     1.27                   106                        2   \n",
       "13     POST     1.28                  1224                       18   \n",
       "14     POST     1.29                   213                        0   \n",
       "15     POST     1.30                   710                       10   \n",
       "16     POST     1.31                   393                        9   \n",
       "17     POST     1.32                   393                       10   \n",
       "\n",
       "    PROB_OF_SUCCESS TOTAL_METHODS  \n",
       "0           0.50000            36  \n",
       "1           0.75000            36  \n",
       "2           0.87500            36  \n",
       "3           0.93750            36  \n",
       "4           0.92969            36  \n",
       "5           0.96484            36  \n",
       "6           0.94595            37  \n",
       "7           0.97338            27  \n",
       "8           0.96824            27  \n",
       "9           0.50000           272  \n",
       "10          0.75246           273  \n",
       "11          0.87133           273  \n",
       "12          0.93118           272  \n",
       "13          0.91566           288  \n",
       "14          0.96212           288  \n",
       "15          0.89524           341  \n",
       "16          0.93803           340  \n",
       "17          0.94741           347  "
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "file_name=\"experimento_output.csv\"\n",
    "output.to_csv(file_name, sep=';', encoding='utf-8', index=False)\n",
    "output"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Visão geral:  \n",
    "\n",
    "Na primeira vs, não se tem nenhum dado passado. E cada endpoint tem probabilidade 0.5 de sucesso.   \n",
    "Na próxima versão, você recomputa as probabilidades do endpoint:\n",
    "    - se o método não teve mudança, então a nova probabilidade de sucesso dele é new = (old + 1) / 2\n",
    "    - se um método tiver sido alterado, você deve resetar sua probabilidade para 0.5\n",
    "    - a probabilidade do endpoint vai ser a soma das probabilidades dos métodos dividido pela quantidade dos métodos \n",
    "      "
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
 }
	{
	"cells": [
	{
	"cell_type": "code",
	"execution_count": 1,
	"metadata": {},
	"outputs": [],
	"source": [
	"import pandas as pd\n",
	"import numpy as np\n",
	"from scipy.special import comb\n",
	"from scipy.special import perm\n",
	"from pprint import pprint\n",
	"from functools import reduce\n",
	"from collections import Counter\n",
	"import json\n",
	"from __future__ import division\n",
	"from sympy.interactive import printing\n",
	"printing.init_printing()"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"### Importando e filtrando dados:"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 2,
	"metadata": {},
	"outputs": [],
	"source": [
	"file_path = '../dados/cooked/epollog_tds.csv'\n",
	"requests = pd.DataFrame(columns=['DATE', 'REQUEST_TYPE', 'REQUEST', 'MAJOR_VS'])\n",
	"requests = pd.read_csv(file_path, delimiter=\";\")"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 3,
	"metadata": {},
	"outputs": [],
	"source": [
	"post_request_regex = '(caso)/([a-zA-z0-9]*)'\n",
	"get_request_regex = '(caso)/([0-9])/([a-z]){4}/([0-9])'\n",
	"posts = requests.loc[\n",
	" ((requests['REQUEST_TYPE'] == 'POST') & \n",
	" (requests['REQUEST'].str.match(post_request_regex)))]\n",
	"gets = requests.loc[\n",
	" ((requests['REQUEST_TYPE'] == 'GET') & \n",
	" (requests['REQUEST'].str.match(get_request_regex)))]\n",
	"requests = posts.append(gets, ignore_index=True)\n",
	"requests = requests.loc[(requests['MAJOR_VS'] != \"1.33.0\")]"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"### Módulos auxiliares:"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 17,
	"metadata": {},
	"outputs": [],
	"source": [
	"class Method(object):\n",
	" \n",
	" def __init__(self, name, class_name, params):\n",
	" self.name = name\n",
	" self.class_name = class_name\n",
	" self.params = list(filter(lambda p: p != \"\", params))\n",
	" self.id = self.__hash__() \n",
	" self.prob = 0.5\n",
	" \n",
	" def reset_prob(self):\n",
	" self.prob = 0.5\n",
	" \n",
	" def __str__(self):\n",
	" return str(self.__dict__)\n",
	"\n",
	" def __eq__(self, other): \n",
	" return self.__dict__ == other.__dict__\n",
	" \n",
	" def __hash__(self):\n",
	" hashed_id = self.class_name + \".\" + self.name\n",
	" return hash(hashed_id)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 18,
	"metadata": {},
	"outputs": [],
	"source": [
	"def load_version_impact_json(vs):\n",
	" version_impact_path = '../dados/cooked/versionImpact/'\n",
	" return json.load(open(version_impact_path + vs + '.json'))"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 19,
	"metadata": {},
	"outputs": [],
	"source": [
	"def load_endpoints_json(vs):\n",
	" endpoints_path = '../dados/cooked/endpoints/'\n",
	" return json.load(open(endpoints_path + vs + '.json'))"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 20,
	"metadata": {},
	"outputs": [],
	"source": [
	"def get_used_methods(endpoints_json, target_endpoint, rest_type):\n",
	" \"\"\"\n",
	" Given a json to map endpoints and its used methods to retrieve data, returns a list of \n",
	" methods used to accomplish endpoint purpose. \n",
	" \"\"\"\n",
	" used_methods = []\n",
	" for endpoint_obj in endpoints_json[\"endpointList\"]:\n",
	" if target_endpoint == endpoint_obj[\"endpoint\"]:\n",
	" if endpoint_obj[\"verb\"] == rest_type:\n",
	" methods = endpoint_obj[\"impactedMethodsListOption\"]\n",
	" for method in methods:\n",
	" name = method[\"methodName\"]\n",
	" params = method['parameters']\n",
	" pkcg = method[\"classDeclaration\"].split(\".\")[-1]\n",
	" method_obj = Method(name, pkcg, params)\n",
	" used_methods.append(method_obj)\n",
	" return used_methods"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 21,
	"metadata": {},
	"outputs": [],
	"source": [
	"def get_changed_methods(impacts_json):\n",
	" \"\"\"\n",
	" Returns the methods changed from the previous version to the current version indicate by impacts_json.\n",
	" \"\"\"\n",
	" methods_changed = []\n",
	" for impacted_methods in impacts_json[\"impactList\"]:\n",
	" pckg_full_string = impacted_methods[\"fileName\"]\n",
	" pckg_clean = pckg_full_string.replace(\".java\", \"\")\n",
	" class_name = pckg_clean.split(\"/\")[-1]\n",
	" for method_name in impacted_methods[\"methods\"]:\n",
	" methods_changed.append(class_name + \".\" + method_name)\n",
	" return methods_changed"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"### Experimento:"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 28,
	"metadata": {},
	"outputs": [],
	"source": [
	"VS_INDEX = 1\n",
	"REQUEST_INDEX = 0 \n",
	"last_rest_request=''\n",
	"output = pd.DataFrame(columns=[\n",
	" 'EDNPOINT', 'MAJOR_VS', 'TOTAL_CHANGED_METHODS', 'CHANGED_ENDPOINT_METHODS', 'PROB_OF_SUCCESS', 'TOTAL_METHODS'])\n",
	"\n",
	"iter_i = 0\n",
	"for group, group_itens in requests.groupby(['REQUEST_TYPE', 'MAJOR_VS']):\n",
	"\n",
	" vs = group[VS_INDEX][:-2]\n",
	" rest_request = group[REQUEST_INDEX]\n",
	" n_requests = len(group_itens.get_values()) \n",
	"\n",
	" if last_rest_request != rest_request:\n",
	" methods_dict = dict() \n",
	" \n",
	" endpoints = load_endpoints_json(vs) \n",
	" methods_used_by_endpoint = get_used_methods(endpoints, '/caso', rest_request) \n",
	" impacts = load_version_impact_json(vs)\n",
	" changed_methods_name = get_changed_methods(impacts)\n",
	" \n",
	" # adding a new methods used by endpoint in dict\n",
	" for method in methods_used_by_endpoint:\n",
	" if method.id not in methods_dict:\n",
	" methods_dict[method.id] = method\n",
	" \n",
	" changed_methods_not_used = 0\n",
	" # for methods changed by curr vs, reset success prob\n",
	" for method_name in changed_methods_name: \n",
	" method_id = hash(method_name)\n",
	" if method_id not in methods_dict:\n",
	" changed_methods_not_used += 1\n",
	" continue\n",
	" method = methods_dict[method_id]\n",
	" method.reset_prob() \n",
	"\n",
	" # for methods used in endpoint, get their history in methods_dict\n",
	" for i in range(len(methods_used_by_endpoint)):\n",
	" method_id = methods_used_by_endpoint[i].id\n",
	" methods_used_by_endpoint[i] = methods_dict[method_id]\n",
	" #print(methods_dict[method_id].prob)\n",
	" \n",
	" # calc endpoint prob of success\n",
	" initializer = 0\n",
	" iterable = methods_used_by_endpoint\n",
	" sum_probs = reduce(lambda sum_prob, method: sum_prob + method.prob, \n",
	" iterable, \n",
	" initializer)\n",
	" prob = sum_probs / float(len(methods_used_by_endpoint))\n",
	" \n",
	" # update methods probs\n",
	" for method in methods_used_by_endpoint:\n",
	" new_prob = (method.prob + 1) / 2.0\n",
	" method.prob = new_prob\n",
	" methods_dict[method.id] = method \n",
	"\n",
	" output.loc[iter_i] = [rest_request, vs, len(changed_methods_name), len(changed_methods_name) - changed_methods_not_used, round(prob, 5), len(methods_used_by_endpoint)]\n",
	" iter_i += 1\n",
	" \n",
	" last_rest_request = rest_request"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 29,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/html": [
	"<div>\n",
	"<style scoped>\n",
	" .dataframe tbody tr th:only-of-type {\n",
	" vertical-align: middle;\n",
	" }\n",
	"\n",
	" .dataframe tbody tr th {\n",
	" vertical-align: top;\n",
	" }\n",
	"\n",
	" .dataframe thead th {\n",
	" text-align: right;\n",
	" }\n",
	"</style>\n",
	"<table border=\"1\" class=\"dataframe\">\n",
	" <thead>\n",
	" <tr style=\"text-align: right;\">\n",
	" <th></th>\n",
	" <th>EDNPOINT</th>\n",
	" <th>MAJOR_VS</th>\n",
	" <th>TOTAL_CHANGED_METHODS</th>\n",
	" <th>CHANGED_ENDPOINT_METHODS</th>\n",
	" <th>PROB_OF_SUCCESS</th>\n",
	" <th>TOTAL_METHODS</th>\n",
	" </tr>\n",
	" </thead>\n",
	" <tbody>\n",
	" <tr>\n",
	" <th>0</th>\n",
	" <td>GET</td>\n",
	" <td>1.24</td>\n",
	" <td>128</td>\n",
	" <td>0</td>\n",
	" <td>0.50000</td>\n",
	" <td>36</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>1</th>\n",
	" <td>GET</td>\n",
	" <td>1.25</td>\n",
	" <td>563</td>\n",
	" <td>0</td>\n",
	" <td>0.75000</td>\n",
	" <td>36</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>2</th>\n",
	" <td>GET</td>\n",
	" <td>1.26</td>\n",
	" <td>228</td>\n",
	" <td>0</td>\n",
	" <td>0.87500</td>\n",
	" <td>36</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>3</th>\n",
	" <td>GET</td>\n",
	" <td>1.27</td>\n",
	" <td>106</td>\n",
	" <td>0</td>\n",
	" <td>0.93750</td>\n",
	" <td>36</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>4</th>\n",
	" <td>GET</td>\n",
	" <td>1.28</td>\n",
	" <td>1224</td>\n",
	" <td>3</td>\n",
	" <td>0.92969</td>\n",
	" <td>36</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>5</th>\n",
	" <td>GET</td>\n",
	" <td>1.29</td>\n",
	" <td>213</td>\n",
	" <td>0</td>\n",
	" <td>0.96484</td>\n",
	" <td>36</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>6</th>\n",
	" <td>GET</td>\n",
	" <td>1.30</td>\n",
	" <td>710</td>\n",
	" <td>2</td>\n",
	" <td>0.94595</td>\n",
	" <td>37</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>7</th>\n",
	" <td>GET</td>\n",
	" <td>1.31</td>\n",
	" <td>393</td>\n",
	" <td>2</td>\n",
	" <td>0.97338</td>\n",
	" <td>27</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>8</th>\n",
	" <td>GET</td>\n",
	" <td>1.32</td>\n",
	" <td>393</td>\n",
	" <td>1</td>\n",
	" <td>0.96824</td>\n",
	" <td>27</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>9</th>\n",
	" <td>POST</td>\n",
	" <td>1.24</td>\n",
	" <td>128</td>\n",
	" <td>1</td>\n",
	" <td>0.50000</td>\n",
	" <td>272</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>10</th>\n",
	" <td>POST</td>\n",
	" <td>1.25</td>\n",
	" <td>563</td>\n",
	" <td>4</td>\n",
	" <td>0.75246</td>\n",
	" <td>273</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>11</th>\n",
	" <td>POST</td>\n",
	" <td>1.26</td>\n",
	" <td>228</td>\n",
	" <td>4</td>\n",
	" <td>0.87133</td>\n",
	" <td>273</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>12</th>\n",
	" <td>POST</td>\n",
	" <td>1.27</td>\n",
	" <td>106</td>\n",
	" <td>2</td>\n",
	" <td>0.93118</td>\n",
	" <td>272</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>13</th>\n",
	" <td>POST</td>\n",
	" <td>1.28</td>\n",
	" <td>1224</td>\n",
	" <td>18</td>\n",
	" <td>0.91566</td>\n",
	" <td>288</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>14</th>\n",
	" <td>POST</td>\n",
	" <td>1.29</td>\n",
	" <td>213</td>\n",
	" <td>0</td>\n",
	" <td>0.96212</td>\n",
	" <td>288</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>15</th>\n",
	" <td>POST</td>\n",
	" <td>1.30</td>\n",
	" <td>710</td>\n",
	" <td>10</td>\n",
	" <td>0.89524</td>\n",
	" <td>341</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>16</th>\n",
	" <td>POST</td>\n",
	" <td>1.31</td>\n",
	" <td>393</td>\n",
	" <td>9</td>\n",
	" <td>0.93803</td>\n",
	" <td>340</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>17</th>\n",
	" <td>POST</td>\n",
	" <td>1.32</td>\n",
	" <td>393</td>\n",
	" <td>10</td>\n",
	" <td>0.94741</td>\n",
	" <td>347</td>\n",
	" </tr>\n",
	" </tbody>\n",
	"</table>\n",
	"</div>"
	],
	"text/plain": [
	" EDNPOINT MAJOR_VS TOTAL_CHANGED_METHODS CHANGED_ENDPOINT_METHODS \\\n",
	"0 GET 1.24 128 0 \n",
	"1 GET 1.25 563 0 \n",
	"2 GET 1.26 228 0 \n",
	"3 GET 1.27 106 0 \n",
	"4 GET 1.28 1224 3 \n",
	"5 GET 1.29 213 0 \n",
	"6 GET 1.30 710 2 \n",
	"7 GET 1.31 393 2 \n",
	"8 GET 1.32 393 1 \n",
	"9 POST 1.24 128 1 \n",
	"10 POST 1.25 563 4 \n",
	"11 POST 1.26 228 4 \n",
	"12 POST 1.27 106 2 \n",
	"13 POST 1.28 1224 18 \n",
	"14 POST 1.29 213 0 \n",
	"15 POST 1.30 710 10 \n",
	"16 POST 1.31 393 9 \n",
	"17 POST 1.32 393 10 \n",
	"\n",
	" PROB_OF_SUCCESS TOTAL_METHODS \n",
	"0 0.50000 36 \n",
	"1 0.75000 36 \n",
	"2 0.87500 36 \n",
	"3 0.93750 36 \n",
	"4 0.92969 36 \n",
	"5 0.96484 36 \n",
	"6 0.94595 37 \n",
	"7 0.97338 27 \n",
	"8 0.96824 27 \n",
	"9 0.50000 272 \n",
	"10 0.75246 273 \n",
	"11 0.87133 273 \n",
	"12 0.93118 272 \n",
	"13 0.91566 288 \n",
	"14 0.96212 288 \n",
	"15 0.89524 341 \n",
	"16 0.93803 340 \n",
	"17 0.94741 347 "
	]
	},
	"execution_count": 29,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"file_name=\"experimento_output.csv\"\n",
	"output.to_csv(file_name, sep=';', encoding='utf-8', index=False)\n",
	"output"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"Visão geral: \n",
	"\n",
	"Na primeira vs, não se tem nenhum dado passado. E cada endpoint tem probabilidade 0.5 de sucesso. \n",
	"Na próxima versão, você recomputa as probabilidades do endpoint:\n",
	" - se o método não teve mudança, então a nova probabilidade de sucesso dele é new = (old + 1) / 2\n",
	" - se um método tiver sido alterado, você deve resetar sua probabilidade para 0.5\n",
	" - a probabilidade do endpoint vai ser a soma das probabilidades dos métodos dividido pela quantidade dos métodos \n",
	" "
	]
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "Python 3",
	"language": "python",
	"name": "python3"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.5.2"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 2
	}