Skip to content

Instantly share code, notes, and snippets.

@elsherbini
Last active January 4, 2017 16:31
Show Gist options
  • Save elsherbini/306bcbfa8c9ee32d17fb229762ba066f to your computer and use it in GitHub Desktop.
Save elsherbini/306bcbfa8c9ee32d17fb229762ba066f to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [{
"cell_type": "code",
"execution_count": 19,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import pandas as pd\n",
"import cufflinks as cf\n",
"import plotly\n",
"import plotly.plotly as py\n",
"import plotly.graph_objs as go\n",
"import palettable\n",
"plotly.tools.set_credentials_file(username='elsherbini', api_key='REDACTED')"
]
}, {
"cell_type": "code",
"execution_count": 8,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"lines = []\n",
"with open(\"result.csv\", \"r\") as f:\n",
" for l in f:\n",
" a, b, *c, d = l.split(\",\")\n",
" c = \",\".join(c)\n",
" d = d.rstrip()\n",
" lines.append([a,b,c,d])\n",
" \n",
"df = pd.DataFrame(lines, columns=[\"strain\", \"true_pop\", \"called_pop\", \"match\"])"
]
}, {
"cell_type": "code",
"execution_count": 22,
"metadata": {
"collapsed": false
},
"outputs": [{
"data": {
"text/html": [
"<iframe id=\"igraph\" scrolling=\"no\" style=\"border:none;\" seamless=\"seamless\" src=\"https://plot.ly/~elsherbini/2734.embed\" height=\"587px\" width=\"1116px\"></iframe>"
],
"text/plain": [
"<plotly.tools.PlotlyDisplay object>"
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}],
"source": [
"(1 - (df[df[\"true_pop\"] != df[\"called_pop\"]].groupby(\"true_pop\")[\"strain\"].count() / df[df[\"true_pop\"] == df[\"called_pop\"]].groupby(\"true_pop\")[\"strain\"].count()).fillna(0)).T.iplot(kind = \"bar\")"
]
}, {
"cell_type": "code",
"execution_count": 43,
"metadata": {
"collapsed": false
},
"outputs": [{
"data": {
"text/plain": [
"{'Shewenella': array(['Shewenella', 'Vibrio ordalii', 'Vibrio lentus'], dtype=object),\n",
" 'Vibrio alginolyticus': array(['Vibrio alginolyticus'], dtype=object),\n",
" 'Vibrio breoganii': array(['Vibrio breoganii', 'Vibrio breoganii,Vibrio cyclitrophicus'], dtype=object),\n",
" 'Vibrio cyclitrophicus': array(['Vibrio cyclitrophicus',\n",
" 'Vibrio alginolyticus,Vibrio cyclitrophicus,Vibrio sp F13,Vibrio tasmaniensis',\n",
" 'Vibrio cyclitrophicus,Vibrio sp F13,Vibrio tasmaniensis',\n",
" 'Vibrio lentus'], dtype=object),\n",
" 'Vibrio kanaloae': array(['Vibrio kanaloae'], dtype=object),\n",
" 'Vibrio lentus': array(['Vibrio lentus', 'Vibrio kanaloae', 'Vibrio splendidus',\n",
" 'Vibrio tasmaniensis', 'Vibrio breoganii', 'Vibrio cyclitrophicus',\n",
" 'Vibrio sp F12'], dtype=object),\n",
" 'Vibrio sp 12E03': array(['Vibrio sp F13'], dtype=object),\n",
" 'Vibrio sp F12': array(['Vibrio sp F12'], dtype=object),\n",
" 'Vibrio sp F13': array(['Vibrio sp F13', 'Vibrio sp 12E03', 'Vibrio sp 12E03,Vibrio sp F12',\n",
" 'Vibrio sp F12', 'Vibrio splendidus',\n",
" 'Vibrio sp 12E03,Vibrio sp F12,Vibrio sp F13',\n",
" 'Vibrio cyclitrophicus'], dtype=object),\n",
" 'Vibrio splendidus': array(['Vibrio splendidus', 'Vibrio cyclitrophicus', 'Vibrio lentus',\n",
" 'Vibrio tasmaniensis', 'Vibrio splendidus,Vibrio splendidus F11'], dtype=object),\n",
" 'Vibrio splendidus F11': array(['Vibrio splendidus F11'], dtype=object),\n",
" 'Vibrio tasmaniensis': array(['Vibrio tasmaniensis', 'Vibrio breoganii', 'Vibrio lentus',\n",
" 'Vibrio alginolyticus,Vibrio tasmaniensis',\n",
" 'Vibrio splendidus,Vibrio tasmaniensis', 'Vibrio splendidus',\n",
" 'Vibrio cyclitrophicus'], dtype=object)}"
]
},
"execution_count": 43,
"metadata": {},
"output_type": "execute_result"
}],
"source": [
"d"
]
}, {
"cell_type": "code",
"execution_count": 55,
"metadata": {
"collapsed": false
},
"outputs": [{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/josephe/miniconda3/lib/python3.5/site-packages/cufflinks/plotlytools.py:123: FutureWarning:\n",
"\n",
"sort is deprecated, use sort_values(inplace=True) for INPLACE sorting\n",
"\n"
]
}, {
"data": {
"text/html": [
"<iframe id=\"igraph\" scrolling=\"no\" style=\"border:none;\" seamless=\"seamless\" src=\"https://plot.ly/~elsherbini/2740.embed\" height=\"587px\" width=\"1116px\"></iframe>"
],
"text/plain": [
"<plotly.tools.PlotlyDisplay object>"
]
},
"execution_count": 55,
"metadata": {},
"output_type": "execute_result"
}],
"source": [
"d = df.groupby(\"true_pop\").called_pop.unique().to_dict()\n",
"res = {}\n",
"for k, v in d.items():\n",
" for pop in v:\n",
" if k in res:\n",
" res[k][pop] = df[(df[\"true_pop\"]==k)&(df[\"called_pop\"]==pop)][\"strain\"].count()\n",
" else:\n",
" res[k] = {pop:df[(df[\"true_pop\"]==k)&(df[\"called_pop\"]==pop)][\"strain\"].count()}\n",
"mine = pd.DataFrame(res).T\n",
"mine.iplot(kind=\"bar\", barmode=\"stack\", sortbars=True)"
]
}],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.1"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment