Last active
January 4, 2017 16:31
-
-
Save elsherbini/306bcbfa8c9ee32d17fb229762ba066f to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [{ | |
"cell_type": "code", | |
"execution_count": 19, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"import pandas as pd\n", | |
"import cufflinks as cf\n", | |
"import plotly\n", | |
"import plotly.plotly as py\n", | |
"import plotly.graph_objs as go\n", | |
"import palettable\n", | |
"plotly.tools.set_credentials_file(username='elsherbini', api_key='REDACTED')" | |
] | |
}, { | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"lines = []\n", | |
"with open(\"result.csv\", \"r\") as f:\n", | |
" for l in f:\n", | |
" a, b, *c, d = l.split(\",\")\n", | |
" c = \",\".join(c)\n", | |
" d = d.rstrip()\n", | |
" lines.append([a,b,c,d])\n", | |
" \n", | |
"df = pd.DataFrame(lines, columns=[\"strain\", \"true_pop\", \"called_pop\", \"match\"])" | |
] | |
}, { | |
"cell_type": "code", | |
"execution_count": 22, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [{ | |
"data": { | |
"text/html": [ | |
"<iframe id=\"igraph\" scrolling=\"no\" style=\"border:none;\" seamless=\"seamless\" src=\"https://plot.ly/~elsherbini/2734.embed\" height=\"587px\" width=\"1116px\"></iframe>" | |
], | |
"text/plain": [ | |
"<plotly.tools.PlotlyDisplay object>" | |
] | |
}, | |
"execution_count": 22, | |
"metadata": {}, | |
"output_type": "execute_result" | |
}], | |
"source": [ | |
"(1 - (df[df[\"true_pop\"] != df[\"called_pop\"]].groupby(\"true_pop\")[\"strain\"].count() / df[df[\"true_pop\"] == df[\"called_pop\"]].groupby(\"true_pop\")[\"strain\"].count()).fillna(0)).T.iplot(kind = \"bar\")" | |
] | |
}, { | |
"cell_type": "code", | |
"execution_count": 43, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [{ | |
"data": { | |
"text/plain": [ | |
"{'Shewenella': array(['Shewenella', 'Vibrio ordalii', 'Vibrio lentus'], dtype=object),\n", | |
" 'Vibrio alginolyticus': array(['Vibrio alginolyticus'], dtype=object),\n", | |
" 'Vibrio breoganii': array(['Vibrio breoganii', 'Vibrio breoganii,Vibrio cyclitrophicus'], dtype=object),\n", | |
" 'Vibrio cyclitrophicus': array(['Vibrio cyclitrophicus',\n", | |
" 'Vibrio alginolyticus,Vibrio cyclitrophicus,Vibrio sp F13,Vibrio tasmaniensis',\n", | |
" 'Vibrio cyclitrophicus,Vibrio sp F13,Vibrio tasmaniensis',\n", | |
" 'Vibrio lentus'], dtype=object),\n", | |
" 'Vibrio kanaloae': array(['Vibrio kanaloae'], dtype=object),\n", | |
" 'Vibrio lentus': array(['Vibrio lentus', 'Vibrio kanaloae', 'Vibrio splendidus',\n", | |
" 'Vibrio tasmaniensis', 'Vibrio breoganii', 'Vibrio cyclitrophicus',\n", | |
" 'Vibrio sp F12'], dtype=object),\n", | |
" 'Vibrio sp 12E03': array(['Vibrio sp F13'], dtype=object),\n", | |
" 'Vibrio sp F12': array(['Vibrio sp F12'], dtype=object),\n", | |
" 'Vibrio sp F13': array(['Vibrio sp F13', 'Vibrio sp 12E03', 'Vibrio sp 12E03,Vibrio sp F12',\n", | |
" 'Vibrio sp F12', 'Vibrio splendidus',\n", | |
" 'Vibrio sp 12E03,Vibrio sp F12,Vibrio sp F13',\n", | |
" 'Vibrio cyclitrophicus'], dtype=object),\n", | |
" 'Vibrio splendidus': array(['Vibrio splendidus', 'Vibrio cyclitrophicus', 'Vibrio lentus',\n", | |
" 'Vibrio tasmaniensis', 'Vibrio splendidus,Vibrio splendidus F11'], dtype=object),\n", | |
" 'Vibrio splendidus F11': array(['Vibrio splendidus F11'], dtype=object),\n", | |
" 'Vibrio tasmaniensis': array(['Vibrio tasmaniensis', 'Vibrio breoganii', 'Vibrio lentus',\n", | |
" 'Vibrio alginolyticus,Vibrio tasmaniensis',\n", | |
" 'Vibrio splendidus,Vibrio tasmaniensis', 'Vibrio splendidus',\n", | |
" 'Vibrio cyclitrophicus'], dtype=object)}" | |
] | |
}, | |
"execution_count": 43, | |
"metadata": {}, | |
"output_type": "execute_result" | |
}], | |
"source": [ | |
"d" | |
] | |
}, { | |
"cell_type": "code", | |
"execution_count": 55, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"/home/josephe/miniconda3/lib/python3.5/site-packages/cufflinks/plotlytools.py:123: FutureWarning:\n", | |
"\n", | |
"sort is deprecated, use sort_values(inplace=True) for INPLACE sorting\n", | |
"\n" | |
] | |
}, { | |
"data": { | |
"text/html": [ | |
"<iframe id=\"igraph\" scrolling=\"no\" style=\"border:none;\" seamless=\"seamless\" src=\"https://plot.ly/~elsherbini/2740.embed\" height=\"587px\" width=\"1116px\"></iframe>" | |
], | |
"text/plain": [ | |
"<plotly.tools.PlotlyDisplay object>" | |
] | |
}, | |
"execution_count": 55, | |
"metadata": {}, | |
"output_type": "execute_result" | |
}], | |
"source": [ | |
"d = df.groupby(\"true_pop\").called_pop.unique().to_dict()\n", | |
"res = {}\n", | |
"for k, v in d.items():\n", | |
" for pop in v:\n", | |
" if k in res:\n", | |
" res[k][pop] = df[(df[\"true_pop\"]==k)&(df[\"called_pop\"]==pop)][\"strain\"].count()\n", | |
" else:\n", | |
" res[k] = {pop:df[(df[\"true_pop\"]==k)&(df[\"called_pop\"]==pop)][\"strain\"].count()}\n", | |
"mine = pd.DataFrame(res).T\n", | |
"mine.iplot(kind=\"bar\", barmode=\"stack\", sortbars=True)" | |
] | |
}], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.5.1" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 0 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment