Skip to content

Instantly share code, notes, and snippets.

@andylolz
Created May 15, 2019 11:21
Show Gist options
  • Save andylolz/64dcdb01e5b53b40beb36d158c4288e0 to your computer and use it in GitHub Desktop.
Save andylolz/64dcdb01e5b53b40beb36d158c4288e0 to your computer and use it in GitHub Desktop.
Some stats on v1.0x data, using iatikit. Stats generated 15 May 2019.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"from datetime import date\n",
"import logging\n",
"\n",
"import iatikit\n",
"\n",
"\n",
"logging.getLogger().setLevel(logging.ERROR)"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"# collect all v1.0x datasets\n",
"datasets = []\n",
"for x in iatikit.data().datasets:\n",
" if not x.validate_xml():\n",
" continue\n",
" if x.version.startswith('1'):\n",
" datasets.append(x)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Total v1.0x organisation datasets: 116\n"
]
}
],
"source": [
"total_org_datasets = len([x for x in datasets if x.filetype == 'organisation'])\n",
"print(f'Total v1.0x organisation datasets: {total_org_datasets:,}')"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Total v1.0x activity datasets: 594\n"
]
}
],
"source": [
"total_act_datasets = len([x for x in datasets if x.filetype == 'activity'])\n",
"print(f'Total v1.0x activity datasets: {total_act_datasets:,}')"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Total v1.0x activities: 68,698\n"
]
}
],
"source": [
"total_acts = sum([len(x.activities) for x in datasets])\n",
"print(f'Total v1.0x activities: {total_acts:,}')"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Total v1.0x activities with status 1 or 2: 24,125\n"
]
}
],
"source": [
"query = 'activity-status/@code=\"1\" or activity-status/@code=\"2\"'\n",
"total_live_acts = sum([\n",
" dataset.activities.where(xpath=query).count()\n",
" for dataset in datasets])\n",
"print(f'Total v1.0x activities with status 1 or 2: {total_live_acts:,}')"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Total v1.0x activities with end date in the future: 28,057\n"
]
}
],
"source": [
"total_future_acts = 0\n",
"for dataset in datasets:\n",
" for activity in dataset.activities:\n",
" if activity.end and activity.end < date.today():\n",
" continue\n",
" total_future_acts += 1\n",
"print(f'Total v1.0x activities with end date in the future: ' +\n",
" f'{total_future_acts:,}')"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.0"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment