Skip to content

Instantly share code, notes, and snippets.

@italo-batista
Created May 30, 2018 21:30
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save italo-batista/1f0cec3b7e67384edb597b72fae1a240 to your computer and use it in GitHub Desktop.
Save italo-batista/1f0cec3b7e67384edb597b72fae1a240 to your computer and use it in GitHub Desktop.
experimento_v2.ipynb
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"from scipy.special import comb\n",
"from scipy.special import perm\n",
"from pprint import pprint\n",
"from functools import reduce\n",
"from collections import Counter\n",
"import json\n",
"from __future__ import division\n",
"from sympy.interactive import printing\n",
"printing.init_printing()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Importando e filtrando dados:"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"file_path = '../dados/cooked/epollog_tds.csv'\n",
"requests = pd.DataFrame(columns=['DATE', 'REQUEST_TYPE', 'REQUEST', 'MAJOR_VS'])\n",
"requests = pd.read_csv(file_path, delimiter=\";\")"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"post_request_regex = '(caso)/([a-zA-z0-9]*)'\n",
"get_request_regex = '(caso)/([0-9]*)/([a-z]){4}/([0-9]*)'\n",
"posts = requests.loc[\n",
" ((requests['REQUEST_TYPE'] == 'POST') & \n",
" (requests['REQUEST'].str.match(post_request_regex)))]\n",
"gets = requests.loc[\n",
" ((requests['REQUEST_TYPE'] == 'GET') & \n",
" (requests['REQUEST'].str.match(get_request_regex)))]\n",
"requests = posts.append(gets, ignore_index=True)\n",
"requests = requests.loc[(requests['MAJOR_VS'] != \"1.33.0\")]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Módulos auxiliares:"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [],
"source": [
"class Method(object):\n",
" \n",
" def __init__(self, name, class_name, params):\n",
" self.name = name\n",
" self.class_name = class_name\n",
" self.params = list(filter(lambda p: p != \"\", params))\n",
" self.id = self.__hash__() \n",
" self.prob = 0.5\n",
" \n",
" def reset_prob(self):\n",
" self.prob = 0.5\n",
" \n",
" def __str__(self):\n",
" return str(self.__dict__)\n",
"\n",
" def __eq__(self, other): \n",
" return self.__dict__ == other.__dict__\n",
" \n",
" def __hash__(self):\n",
" hashed_id = self.class_name + \".\" + self.name\n",
" return hash(hashed_id)"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [],
"source": [
"def load_version_impact_json(vs):\n",
" version_impact_path = '../dados/cooked/versionImpact/'\n",
" return json.load(open(version_impact_path + vs + '.json'))"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [],
"source": [
"def load_endpoints_json(vs):\n",
" endpoints_path = '../dados/cooked/endpoints/'\n",
" return json.load(open(endpoints_path + vs + '.json'))"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [],
"source": [
"def get_used_methods(endpoints_json, target_endpoint, rest_type):\n",
" \"\"\"\n",
" Given a json to map endpoints and its used methods to retrieve data, returns a list of \n",
" methods used to accomplish endpoint purpose. \n",
" \"\"\"\n",
" used_methods = []\n",
" for endpoint_obj in endpoints_json[\"endpointList\"]:\n",
" if target_endpoint == endpoint_obj[\"endpoint\"]:\n",
" if endpoint_obj[\"verb\"] == rest_type:\n",
" methods = endpoint_obj[\"impactedMethodsListOption\"]\n",
" for method in methods:\n",
" name = method[\"methodName\"]\n",
" params = method['parameters']\n",
" pkcg = method[\"classDeclaration\"].split(\".\")[-1]\n",
" method_obj = Method(name, pkcg, params)\n",
" used_methods.append(method_obj)\n",
" return used_methods"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [],
"source": [
"def get_changed_methods(impacts_json):\n",
" \"\"\"\n",
" Returns the methods changed from the previous version to the current version indicate by impacts_json.\n",
" \"\"\"\n",
" methods_changed = []\n",
" for impacted_methods in impacts_json[\"impactList\"]:\n",
" pckg_full_string = impacted_methods[\"fileName\"]\n",
" pckg_clean = pckg_full_string.replace(\".java\", \"\")\n",
" class_name = pckg_clean.split(\"/\")[-1]\n",
" for method_name in impacted_methods[\"methods\"]:\n",
" methods_changed.append(class_name + \".\" + method_name)\n",
" return methods_changed"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Experimento:"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {},
"outputs": [],
"source": [
"VS_INDEX = 1\n",
"REQUEST_INDEX = 0 \n",
"last_rest_request=''\n",
"output = pd.DataFrame(columns=[\n",
" 'EDNPOINT', 'MAJOR_VS', 'TOTAL_CHANGED_METHODS', 'CHANGED_ENDPOINT_METHODS', 'PROB_OF_SUCCESS', 'TOTAL_METHODS'])\n",
"\n",
"iter_i = 0\n",
"for group, group_itens in requests.groupby(['REQUEST_TYPE', 'MAJOR_VS']):\n",
"\n",
" vs = group[VS_INDEX][:-2]\n",
" rest_request = group[REQUEST_INDEX]\n",
" n_requests = len(group_itens.get_values()) \n",
"\n",
" if last_rest_request != rest_request:\n",
" methods_dict = dict() \n",
" \n",
" endpoints = load_endpoints_json(vs) \n",
" methods_used_by_endpoint = get_used_methods(endpoints, '/caso', rest_request) \n",
" impacts = load_version_impact_json(vs)\n",
" changed_methods_name = get_changed_methods(impacts)\n",
" \n",
" # adding a new methods used by endpoint in dict\n",
" for method in methods_used_by_endpoint:\n",
" if method.id not in methods_dict:\n",
" methods_dict[method.id] = method\n",
" \n",
" changed_methods_not_used = 0\n",
" # for methods changed by curr vs, reset success prob\n",
" for method_name in changed_methods_name: \n",
" method_id = hash(method_name)\n",
" if method_id not in methods_dict:\n",
" changed_methods_not_used += 1\n",
" continue\n",
" method = methods_dict[method_id]\n",
" method.reset_prob() \n",
"\n",
" # for methods used in endpoint, get their history in methods_dict\n",
" for i in range(len(methods_used_by_endpoint)):\n",
" method_id = methods_used_by_endpoint[i].id\n",
" methods_used_by_endpoint[i] = methods_dict[method_id]\n",
" #print(methods_dict[method_id].prob)\n",
" \n",
" # calc endpoint prob of success\n",
" initializer = 0\n",
" iterable = methods_used_by_endpoint\n",
" sum_probs = reduce(lambda sum_prob, method: sum_prob + method.prob, \n",
" iterable, \n",
" initializer)\n",
" prob = sum_probs / float(len(methods_used_by_endpoint))\n",
" \n",
" # update methods probs\n",
" for method in methods_used_by_endpoint:\n",
" new_prob = (method.prob + 1) / 2.0\n",
" method.prob = new_prob\n",
" methods_dict[method.id] = method \n",
"\n",
" output.loc[iter_i] = [rest_request, vs, len(changed_methods_name), len(changed_methods_name) - changed_methods_not_used, round(prob, 5), len(methods_used_by_endpoint)]\n",
" iter_i += 1\n",
" \n",
" last_rest_request = rest_request"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>EDNPOINT</th>\n",
" <th>MAJOR_VS</th>\n",
" <th>TOTAL_CHANGED_METHODS</th>\n",
" <th>CHANGED_ENDPOINT_METHODS</th>\n",
" <th>PROB_OF_SUCCESS</th>\n",
" <th>TOTAL_METHODS</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>GET</td>\n",
" <td>1.24</td>\n",
" <td>128</td>\n",
" <td>0</td>\n",
" <td>0.50000</td>\n",
" <td>36</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>GET</td>\n",
" <td>1.25</td>\n",
" <td>563</td>\n",
" <td>0</td>\n",
" <td>0.75000</td>\n",
" <td>36</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>GET</td>\n",
" <td>1.26</td>\n",
" <td>228</td>\n",
" <td>0</td>\n",
" <td>0.87500</td>\n",
" <td>36</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>GET</td>\n",
" <td>1.27</td>\n",
" <td>106</td>\n",
" <td>0</td>\n",
" <td>0.93750</td>\n",
" <td>36</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>GET</td>\n",
" <td>1.28</td>\n",
" <td>1224</td>\n",
" <td>3</td>\n",
" <td>0.92969</td>\n",
" <td>36</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>GET</td>\n",
" <td>1.29</td>\n",
" <td>213</td>\n",
" <td>0</td>\n",
" <td>0.96484</td>\n",
" <td>36</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>GET</td>\n",
" <td>1.30</td>\n",
" <td>710</td>\n",
" <td>2</td>\n",
" <td>0.94595</td>\n",
" <td>37</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>GET</td>\n",
" <td>1.31</td>\n",
" <td>393</td>\n",
" <td>2</td>\n",
" <td>0.97338</td>\n",
" <td>27</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>GET</td>\n",
" <td>1.32</td>\n",
" <td>393</td>\n",
" <td>1</td>\n",
" <td>0.96824</td>\n",
" <td>27</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>POST</td>\n",
" <td>1.24</td>\n",
" <td>128</td>\n",
" <td>1</td>\n",
" <td>0.50000</td>\n",
" <td>272</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>POST</td>\n",
" <td>1.25</td>\n",
" <td>563</td>\n",
" <td>4</td>\n",
" <td>0.75246</td>\n",
" <td>273</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>POST</td>\n",
" <td>1.26</td>\n",
" <td>228</td>\n",
" <td>4</td>\n",
" <td>0.87133</td>\n",
" <td>273</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>POST</td>\n",
" <td>1.27</td>\n",
" <td>106</td>\n",
" <td>2</td>\n",
" <td>0.93118</td>\n",
" <td>272</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>POST</td>\n",
" <td>1.28</td>\n",
" <td>1224</td>\n",
" <td>18</td>\n",
" <td>0.91566</td>\n",
" <td>288</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>POST</td>\n",
" <td>1.29</td>\n",
" <td>213</td>\n",
" <td>0</td>\n",
" <td>0.96212</td>\n",
" <td>288</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15</th>\n",
" <td>POST</td>\n",
" <td>1.30</td>\n",
" <td>710</td>\n",
" <td>10</td>\n",
" <td>0.89524</td>\n",
" <td>341</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16</th>\n",
" <td>POST</td>\n",
" <td>1.31</td>\n",
" <td>393</td>\n",
" <td>9</td>\n",
" <td>0.93803</td>\n",
" <td>340</td>\n",
" </tr>\n",
" <tr>\n",
" <th>17</th>\n",
" <td>POST</td>\n",
" <td>1.32</td>\n",
" <td>393</td>\n",
" <td>10</td>\n",
" <td>0.94741</td>\n",
" <td>347</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" EDNPOINT MAJOR_VS TOTAL_CHANGED_METHODS CHANGED_ENDPOINT_METHODS \\\n",
"0 GET 1.24 128 0 \n",
"1 GET 1.25 563 0 \n",
"2 GET 1.26 228 0 \n",
"3 GET 1.27 106 0 \n",
"4 GET 1.28 1224 3 \n",
"5 GET 1.29 213 0 \n",
"6 GET 1.30 710 2 \n",
"7 GET 1.31 393 2 \n",
"8 GET 1.32 393 1 \n",
"9 POST 1.24 128 1 \n",
"10 POST 1.25 563 4 \n",
"11 POST 1.26 228 4 \n",
"12 POST 1.27 106 2 \n",
"13 POST 1.28 1224 18 \n",
"14 POST 1.29 213 0 \n",
"15 POST 1.30 710 10 \n",
"16 POST 1.31 393 9 \n",
"17 POST 1.32 393 10 \n",
"\n",
" PROB_OF_SUCCESS TOTAL_METHODS \n",
"0 0.50000 36 \n",
"1 0.75000 36 \n",
"2 0.87500 36 \n",
"3 0.93750 36 \n",
"4 0.92969 36 \n",
"5 0.96484 36 \n",
"6 0.94595 37 \n",
"7 0.97338 27 \n",
"8 0.96824 27 \n",
"9 0.50000 272 \n",
"10 0.75246 273 \n",
"11 0.87133 273 \n",
"12 0.93118 272 \n",
"13 0.91566 288 \n",
"14 0.96212 288 \n",
"15 0.89524 341 \n",
"16 0.93803 340 \n",
"17 0.94741 347 "
]
},
"execution_count": 29,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"file_name=\"experimento_output.csv\"\n",
"output.to_csv(file_name, sep=';', encoding='utf-8', index=False)\n",
"output"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Visão geral: \n",
"\n",
"Na primeira vs, não se tem nenhum dado passado. E cada endpoint tem probabilidade 0.5 de sucesso. \n",
"Na próxima versão, você recomputa as probabilidades do endpoint:\n",
" - se o método não teve mudança, então a nova probabilidade de sucesso dele é new = (old + 1) / 2\n",
" - se um método tiver sido alterado, você deve resetar sua probabilidade para 0.5\n",
" - a probabilidade do endpoint vai ser a soma das probabilidades dos métodos dividido pela quantidade dos métodos \n",
" "
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.2"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment