Created
January 5, 2024 17:54
-
-
Save aflaxman/189f40069c4eba8f8e04999207ab140e to your computer and use it in GitHub Desktop.
Vivarium Public Health model for Dementia in Singapore
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"id": "e02ba2a8", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import warnings\n", | |
"warnings.simplefilter(action='ignore', category=FutureWarning) # it would be great to update vivarium to make this unnecessary!" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"id": "11db1f55", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Thu Jan 4 21:02:12 PST 2024\r\n" | |
] | |
} | |
], | |
"source": [ | |
"import numpy as np, matplotlib.pyplot as plt, pandas as pd\n", | |
"pd.set_option('display.max_rows', 8)\n", | |
"!date" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "306fe3c0", | |
"metadata": {}, | |
"source": [ | |
"# Simulate billing data for DisMod-IPD\n", | |
"\n", | |
"Extending dementia sim I prototyped last week." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"id": "6a536915", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Config: 'input_data:\n", | |
" intermediary_data_cache_path:\n", | |
" user_configs: /ihme/scratch/tmp/gbd_2017_cache\n", | |
" cache_data:\n", | |
" base: True'\n", | |
"Cache Dir: '/ihme/scratch/tmp/gbd_2017_cache'\n" | |
] | |
} | |
], | |
"source": [ | |
"# the Artifact class is defined in the base vivarium module\n", | |
"import vivarium as vi\n", | |
"\n", | |
"# for creating the population and disese model components of the sim we will use vivarium_public_health\n", | |
"import vivarium_public_health as vph\n", | |
"\n", | |
"# and to get the data for the artifact, we will use vivarium_inputs and gbd_mapping\n", | |
"# note that vivarium_inputs only works when on the IHME VPN at present\n", | |
"import vivarium_inputs as vii, gbd_mapping" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"id": "be49200b", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"/homes/abie/.conda/envs/vivarium_nih_us_cvd/lib/python3.11/site-packages/vivarium/framework/artifact/artifact.py:69: UserWarning: No artifact found at /share/scratch/users/abie/t.hdf. Building new artifact.\n", | |
" warnings.warn(f\"No artifact found at {path}. Building new artifact.\")\n" | |
] | |
} | |
], | |
"source": [ | |
"# for this example, we will create a new artifact from scratch\n", | |
"fname = '/share/scratch/users/abie/t.hdf'\n", | |
"!rm $fname\n", | |
"art = vi.Artifact(fname)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"id": "9b8a21f4", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# It would be cool to extend vivarium_inputs methods to accept a list of locations.\n", | |
"# Currently, it is necessary to create a separate artifact for each location of interest.\n", | |
"\n", | |
"location = 'Singapore'\n", | |
"art.write('population.location', location)\n", | |
"\n", | |
"df = vii.get_population_structure(location)\n", | |
"art.write('population.structure', df)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"id": "b3df805b", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Calculating DALYs uses the theoretical minimu risk life expectancy (TMREL)\n", | |
"# and the mortality component in vivarium_public_health expect this to be \n", | |
"# in the artifact as well\n", | |
"\n", | |
"df = vii.get_theoretical_minimum_risk_life_expectancy()\n", | |
"art.write('population.theoretical_minimum_risk_life_expectancy', df)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"id": "84f0f141", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"\u001b[32m2024-01-04 21:03:54.402\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mvivarium_inputs.validation.raw\u001b[0m:\u001b[36mcheck_columns\u001b[0m:\u001b[36m1994\u001b[0m - \u001b[33m\u001b[1mData returned extra columns: {'version_id'}.\u001b[0m\n", | |
"/homes/abie/.conda/envs/vivarium_nih_us_cvd/lib/python3.11/site-packages/vivarium_inputs/utilities.py:477: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n", | |
" data[f\"{split_column_prefix}_end\"] = [\n" | |
] | |
} | |
], | |
"source": [ | |
"# All-cause mortality is an important model parameter\n", | |
"# and vivarium_public_health includes components to\n", | |
"# work with it, which use the all-casuse mortality rate\n", | |
"# from the artifact with the key cause.all_causes.cause_specific_mortality_rate\n", | |
"\n", | |
"df = vii.get_measure(gbd_mapping.causes.all_causes, 'cause_specific_mortality_rate', location)\n", | |
"art.write('cause.all_causes.cause_specific_mortality_rate', df.loc[location]) # TODO: it is annoying that the location needs to be stripped from the dataframe" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "166336ff", | |
"metadata": { | |
"scrolled": true | |
}, | |
"outputs": [], | |
"source": [ | |
"# vivarium_public_health also includes disease models, such as\n", | |
"# vph.SI and vph.SIS that work for conditions modeled in the GBD.\n", | |
"# There is a list of these in the gbd_mapping.causes object\n", | |
"#\n", | |
"# vph.SI uses five epidemiological parameters, named to match the\n", | |
"# name used to create the vph.SI component in the model specification.\n", | |
"#\n", | |
"# For example, we will use vph.disease.SI('dementia') in our model\n", | |
"# below, which will require that the artifact contain data for the\n", | |
"# key cause.dementia.incidence_rate and four other parameters\n", | |
"\n", | |
"cause = gbd_mapping.causes.alzheimers_disease_and_other_dementias\n", | |
"for measure in ['incidence_rate',\n", | |
" 'cause_specific_mortality_rate',\n", | |
" 'prevalence',\n", | |
" 'disability_weight',\n", | |
" 'excess_mortality_rate',\n", | |
" ]:\n", | |
" df = vii.get_measure(cause, measure, location)\n", | |
" key = f'cause.dementia.{measure}'\n", | |
" art.write(key, df.loc[location]) # TODO: it is annoying that the location needs to be stripped from the dataframe" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"id": "0740cf1b", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# the vph.SI cause model also needs data on \"restrictions\" about the disease\n", | |
"# (e.g. Cervical Cancer occurs only in Females) which must be stored in the\n", | |
"# artifact as well.\n", | |
"\n", | |
"key = 'cause.dementia.restrictions'\n", | |
"restriction_dict = cause.restrictions.to_dict()\n", | |
"art.write(key, restriction_dict)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "66b1738b", | |
"metadata": {}, | |
"source": [ | |
"# Add inpatient envelope to artifact\n", | |
"\n", | |
"Adapted from `vivarium_nih_us_cvd`; would be nice to fix `vivarium_inputs` so that this was easier." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 31, | |
"id": "3f844f5b", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"/homes/abie/.conda/envs/vivarium_nih_us_cvd/lib/python3.11/site-packages/get_draws/api.py:313: DeprecationWarning: In GBD 2022, the arguments gbd_round_id and decomp_step will be removed. Switch to using release_id exclusively. Every GBD round and decomp step combination has an equivalent release_id: https://hub.ihme.washington.edu/x/vS7NCQ\n", | |
" release_helpers.validate_decomp_step_and_release_id(\n", | |
"/homes/abie/.conda/envs/vivarium_nih_us_cvd/lib/python3.11/site-packages/get_draws/base/utils.py:30: FutureWarning: In a future version of pandas, a length 1 tuple will be returned when iterating over a groupby with a grouper equal to a list of length 1. Don't supply a list with a single grouper to avoid this warning.\n", | |
" for name, group in df.groupby([group_cols]):\n", | |
"/homes/abie/.conda/envs/vivarium_nih_us_cvd/lib/python3.11/site-packages/get_draws/base/utils.py:30: FutureWarning: In a future version of pandas, a length 1 tuple will be returned when iterating over a groupby with a grouper equal to a list of length 1. Don't supply a list with a single grouper to avoid this warning.\n", | |
" for name, group in df.groupby([group_cols]):\n", | |
"/homes/abie/.conda/envs/vivarium_nih_us_cvd/lib/python3.11/site-packages/vivarium_inputs/utilities.py:477: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n", | |
" data[f\"{split_column_prefix}_end\"] = [\n" | |
] | |
} | |
], | |
"source": [ | |
"%%time\n", | |
"\n", | |
"key = 'healthcare_entity.inpatient_visits.inpatient_envelope'\n", | |
"\n", | |
"location_id = vii.utility_data.get_location_id(location)\n", | |
"entity = vii.mapping_extension.healthcare_entities.inpatient_visits\n", | |
"\n", | |
"\n", | |
"# df = vii.core.get_utilization_rate(entity, location_id)\n", | |
"from vivarium_gbd_access.utilities import get_draws\n", | |
"from vivarium_gbd_access.constants import ROUND_IDS, SEX, SOURCES\n", | |
"from vivarium_gbd_access import gbd\n", | |
"from vivarium_inputs import utilities as vi_utils\n", | |
"from vivarium_inputs import globals as vi_globals\n", | |
"\n", | |
"# vivarium_inputs.core.get_utilization_rate() breaks with the hard-coded\n", | |
"# gbd_round_id=6; use gbd_round_id=5.\n", | |
"# TODO: SDB fix in vivarium_gbd_access.gbd.get_modelable_entity_draws()?\n", | |
"data = get_draws(\n", | |
" gbd_id_type=\"modelable_entity_id\",\n", | |
" gbd_id=entity.gbd_id,\n", | |
" source=SOURCES.EPI,\n", | |
" location_id=location_id,\n", | |
" sex_id=SEX.MALE + SEX.FEMALE,\n", | |
" age_group_id=gbd.get_age_group_id(),\n", | |
" gbd_round_id=ROUND_IDS.GBD_2017,\n", | |
" status=\"best\",\n", | |
")\n", | |
"# Fill in year gaps manually. vi_utils.normalize does not quite work because\n", | |
"# the data is missing required age_bin edges 2015 and 2019. Instead, let's\n", | |
"# assume 2018 and 2019 is the same as 2017 and interpolate everything else\n", | |
"tmp = data[data[\"year_id\"] == 2017]\n", | |
"for year in [2018, 2019]:\n", | |
" tmp[\"year_id\"] = year\n", | |
" data = pd.concat([data, tmp], axis=0)\n", | |
"data = vi_utils.interpolate_year(data)\n", | |
"\n", | |
"# Cleanup\n", | |
"data = vi_utils.normalize(data, fill_value=0)\n", | |
"data = data.filter(vi_globals.DEMOGRAPHIC_COLUMNS + vi_globals.DRAW_COLUMNS)\n", | |
"data = vi_utils.reshape(data)\n", | |
"data = vi_utils.scrub_gbd_conventions(data, location)\n", | |
"data = vi_utils.split_interval(data, interval_column=\"age\", split_column_prefix=\"age\")\n", | |
"data = vi_utils.split_interval(data, interval_column=\"year\", split_column_prefix=\"year\")\n", | |
"data = vi_utils.sort_hierarchical_data(data).droplevel(\"location\")\n", | |
"\n", | |
"art.write(key, data)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "25855ba2", | |
"metadata": {}, | |
"source": [ | |
"# With this minimal artifact in hand, run a sim for dementia\n", | |
"\n", | |
"And include a custom component that has simulants generate data on visits to the hospital" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 48, | |
"id": "432cc67e", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"from vivarium import Component\n", | |
"\n", | |
"class HealthcareUtilization(Component):\n", | |
" \"\"\"Manages healthcare utilization\"\"\"\n", | |
"\n", | |
" ##############\n", | |
" # Properties #\n", | |
" ##############\n", | |
"\n", | |
" @property\n", | |
" def columns_required(self):\n", | |
" return [\n", | |
" \"age\",\n", | |
" \"sex\",\n", | |
" \"dementia\", # TODO: refactor this to be more general (possibly using a pipeline?)\n", | |
" ]\n", | |
"\n", | |
" @property\n", | |
" def initialization_requirements(self):\n", | |
" return {\n", | |
" \"requires_columns\": self.columns_required,\n", | |
" \"requires_values\": [\n", | |
" ],\n", | |
" \"requires_streams\": [self.name],\n", | |
" }\n", | |
"\n", | |
" #####################\n", | |
" # Lifecycle methods #\n", | |
" #####################\n", | |
" \n", | |
" def setup(self, builder) -> None:\n", | |
" self.clock = builder.time.clock()\n", | |
" self.step_size = builder.time.step_size()\n", | |
" self.randomness = builder.randomness.get_stream(self.name)\n", | |
"\n", | |
" # Load data\n", | |
" utilization_data = builder.data.load('healthcare_entity.inpatient_visits.inpatient_envelope')\n", | |
" background_utilization_rate = builder.lookup.build_table(\n", | |
" utilization_data, parameter_columns=[\"age\", \"year\"], key_columns=[\"sex\"]\n", | |
" )\n", | |
" self.background_utilization_rate = builder.value.register_rate_producer(\n", | |
" \"utilization_rate\", background_utilization_rate, requires_columns=[\"age\", \"sex\"]\n", | |
" )\n", | |
" \n", | |
" # HACK: store visit dates for background and dementia visits\n", | |
" self.background_visits = []\n", | |
" self.dementia_visits = []\n", | |
"\n", | |
" ########################\n", | |
" # Event-driven methods #\n", | |
" ########################\n", | |
"\n", | |
" def on_time_step(self, event) -> None:\n", | |
" \"\"\"Determine if someone will go for background visit or dementia visit.\n", | |
" \"\"\"\n", | |
" event_time = event.time\n", | |
" pop = self.population_view.get(event.index, query='alive == \"alive\"')\n", | |
"\n", | |
" # Background visits\n", | |
" utilization_rate = self.background_utilization_rate(pop.index)\n", | |
" visit_background = self.randomness.filter_for_rate(\n", | |
" pop.index, utilization_rate, additional_key=\"background_visits\"\n", | |
" ) # pd.Index\n", | |
"\n", | |
" df_visits = pop.loc[visit_background].copy()\n", | |
" df_visits['date'] = event_time\n", | |
" if len(df_visits) > 0:\n", | |
" self.background_visits.append(df_visits)\n", | |
"\n", | |
" # dementia visits\n", | |
" rows = pop[pop.dementia == 'dementia'].index\n", | |
" if len(rows) > 0:\n", | |
" utilization_rate = self.background_utilization_rate(rows)\n", | |
" utilization_rate *= 10 # TODO: use a pipeline, use real data\n", | |
"\n", | |
" visit_dementia = self.randomness.filter_for_rate(\n", | |
" rows, utilization_rate, additional_key=\"dementia_visits\"\n", | |
" ) # pd.Index\n", | |
"\n", | |
" df_visits = pop.loc[visit_dementia].copy()\n", | |
" df_visits['date'] = event_time\n", | |
" if len(df_visits) > 0:\n", | |
" self.dementia_visits.append(df_visits)\n", | |
"\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 49, | |
"id": "e2d9d075", | |
"metadata": { | |
"scrolled": true | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"\u001b[32m2024-01-04 21:59:18.453\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36msimulation_5\u001b[0m-\u001b[36martifact_manager\u001b[0m:\u001b[36m67\u001b[0m - \u001b[1mRunning simulation from artifact located at /share/scratch/users/abie/t.hdf.\u001b[0m\n", | |
"\u001b[32m2024-01-04 21:59:18.454\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36msimulation_5\u001b[0m-\u001b[36martifact_manager\u001b[0m:\u001b[36m68\u001b[0m - \u001b[1mArtifact base filter terms are ['draw == 0'].\u001b[0m\n", | |
"\u001b[32m2024-01-04 21:59:18.457\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36msimulation_5\u001b[0m-\u001b[36martifact_manager\u001b[0m:\u001b[36m69\u001b[0m - \u001b[1mArtifact additional filter terms are None.\u001b[0m\n", | |
"\u001b[32m2024-01-04 21:59:21.609\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36msimulation_5\u001b[0m-\u001b[36mvalues_manager\u001b[0m:\u001b[36m279\u001b[0m - \u001b[33m\u001b[1mUnsourced pipelines: ['disability_weight']\u001b[0m\n", | |
"\u001b[32m2024-01-04 21:59:21.613\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36msimulation_5\u001b[0m-\u001b[36mresource_manager\u001b[0m:\u001b[36m256\u001b[0m - \u001b[33m\u001b[1mResource missing_value_source.disability_weight is not provided by any component but is needed to compute (value.disability_weight).\u001b[0m\n", | |
"CPU times: user 3.34 s, sys: 94.6 ms, total: 3.43 s\n", | |
"Wall time: 4.17 s\n" | |
] | |
} | |
], | |
"source": [ | |
"%%time\n", | |
"\n", | |
"from vivarium_public_health import population, disease\n", | |
"\n", | |
"# create healthcare utilization component first, for easy access later\n", | |
"healthcare_utilization = HealthcareUtilization()\n", | |
"\n", | |
"# create a vivarium simulation with this SIR component, and run it\n", | |
"sim = vi.InteractiveContext(\n", | |
" components=[\n", | |
" vph.population.BasePopulation(),\n", | |
" vph.population.Mortality(),\n", | |
" vph.disease.SI('dementia'),\n", | |
" healthcare_utilization,\n", | |
" ],\n", | |
" configuration={'input_data': {'artifact_path': fname,\n", | |
" 'input_draw_number': 0, # FIXME: very cryptic error when this is not included in the config dict\n", | |
" },\n", | |
" 'time': {'step_size': 28, # Days\n", | |
" 'start': {\n", | |
" 'year': 1990,\n", | |
" 'month': 1,\n", | |
" 'day': 1,},\n", | |
" 'end': {\n", | |
" 'year': 2020,\n", | |
" 'month': 1,\n", | |
" 'day': 31,\n", | |
" },\n", | |
" },\n", | |
" 'population': {'population_size': 50_000,\n", | |
" 'age_start': 0,\n", | |
" 'age_end': 100,\n", | |
" 'exit_age': 100,\n", | |
" }\n", | |
" },\n", | |
")\n", | |
"df0 = sim.get_population()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 50, | |
"id": "8b5ab4f6", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"susceptible_to_dementia 49559\n", | |
"dementia 441\n", | |
"Name: dementia, dtype: int64" | |
] | |
}, | |
"execution_count": 50, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"df0.dementia.value_counts()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 51, | |
"id": "eae56d3e", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"application/vnd.jupyter.widget-view+json": { | |
"model_id": "bfa8280b4be040f78dfba0c892f5d10d", | |
"version_major": 2, | |
"version_minor": 0 | |
}, | |
"text/plain": [ | |
"VBox(children=(HTML(value=''), IntProgress(value=0, max=393)))" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"CPU times: user 3min 49s, sys: 242 ms, total: 3min 50s\n", | |
"Wall time: 3min 49s\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>tracked</th>\n", | |
" <th>sex</th>\n", | |
" <th>exit_time</th>\n", | |
" <th>location</th>\n", | |
" <th>age</th>\n", | |
" <th>alive</th>\n", | |
" <th>entrance_time</th>\n", | |
" <th>cause_of_death</th>\n", | |
" <th>years_of_life_lost</th>\n", | |
" <th>dementia</th>\n", | |
" <th>susceptible_to_dementia_event_count</th>\n", | |
" <th>susceptible_to_dementia_event_time</th>\n", | |
" <th>dementia_event_time</th>\n", | |
" <th>dementia_event_count</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>True</td>\n", | |
" <td>Male</td>\n", | |
" <td>NaT</td>\n", | |
" <td>Singapore</td>\n", | |
" <td>48.370896</td>\n", | |
" <td>alive</td>\n", | |
" <td>1989-12-04</td>\n", | |
" <td>not_dead</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>susceptible_to_dementia</td>\n", | |
" <td>0</td>\n", | |
" <td>NaT</td>\n", | |
" <td>NaT</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>True</td>\n", | |
" <td>Female</td>\n", | |
" <td>NaT</td>\n", | |
" <td>Singapore</td>\n", | |
" <td>66.642554</td>\n", | |
" <td>alive</td>\n", | |
" <td>1989-12-04</td>\n", | |
" <td>not_dead</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>susceptible_to_dementia</td>\n", | |
" <td>0</td>\n", | |
" <td>NaT</td>\n", | |
" <td>NaT</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>True</td>\n", | |
" <td>Male</td>\n", | |
" <td>NaT</td>\n", | |
" <td>Singapore</td>\n", | |
" <td>61.370077</td>\n", | |
" <td>alive</td>\n", | |
" <td>1989-12-04</td>\n", | |
" <td>not_dead</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>susceptible_to_dementia</td>\n", | |
" <td>0</td>\n", | |
" <td>NaT</td>\n", | |
" <td>NaT</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>True</td>\n", | |
" <td>Male</td>\n", | |
" <td>NaT</td>\n", | |
" <td>Singapore</td>\n", | |
" <td>81.751407</td>\n", | |
" <td>alive</td>\n", | |
" <td>1989-12-04</td>\n", | |
" <td>not_dead</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>susceptible_to_dementia</td>\n", | |
" <td>0</td>\n", | |
" <td>NaT</td>\n", | |
" <td>NaT</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>...</th>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>49996</th>\n", | |
" <td>True</td>\n", | |
" <td>Male</td>\n", | |
" <td>2014-10-06</td>\n", | |
" <td>Singapore</td>\n", | |
" <td>86.447725</td>\n", | |
" <td>dead</td>\n", | |
" <td>1989-12-04</td>\n", | |
" <td>other_causes</td>\n", | |
" <td>9.306914</td>\n", | |
" <td>susceptible_to_dementia</td>\n", | |
" <td>0</td>\n", | |
" <td>NaT</td>\n", | |
" <td>NaT</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>49997</th>\n", | |
" <td>True</td>\n", | |
" <td>Male</td>\n", | |
" <td>NaT</td>\n", | |
" <td>Singapore</td>\n", | |
" <td>75.492170</td>\n", | |
" <td>alive</td>\n", | |
" <td>1989-12-04</td>\n", | |
" <td>not_dead</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>susceptible_to_dementia</td>\n", | |
" <td>0</td>\n", | |
" <td>NaT</td>\n", | |
" <td>NaT</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>49998</th>\n", | |
" <td>True</td>\n", | |
" <td>Female</td>\n", | |
" <td>NaT</td>\n", | |
" <td>Singapore</td>\n", | |
" <td>51.606382</td>\n", | |
" <td>alive</td>\n", | |
" <td>1989-12-04</td>\n", | |
" <td>not_dead</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>susceptible_to_dementia</td>\n", | |
" <td>0</td>\n", | |
" <td>NaT</td>\n", | |
" <td>NaT</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>49999</th>\n", | |
" <td>True</td>\n", | |
" <td>Female</td>\n", | |
" <td>NaT</td>\n", | |
" <td>Singapore</td>\n", | |
" <td>67.977171</td>\n", | |
" <td>alive</td>\n", | |
" <td>1989-12-04</td>\n", | |
" <td>not_dead</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>susceptible_to_dementia</td>\n", | |
" <td>0</td>\n", | |
" <td>NaT</td>\n", | |
" <td>NaT</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"<p>49808 rows × 14 columns</p>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" tracked sex exit_time location age alive entrance_time \\\n", | |
"0 True Male NaT Singapore 48.370896 alive 1989-12-04 \n", | |
"1 True Female NaT Singapore 66.642554 alive 1989-12-04 \n", | |
"2 True Male NaT Singapore 61.370077 alive 1989-12-04 \n", | |
"3 True Male NaT Singapore 81.751407 alive 1989-12-04 \n", | |
"... ... ... ... ... ... ... ... \n", | |
"49996 True Male 2014-10-06 Singapore 86.447725 dead 1989-12-04 \n", | |
"49997 True Male NaT Singapore 75.492170 alive 1989-12-04 \n", | |
"49998 True Female NaT Singapore 51.606382 alive 1989-12-04 \n", | |
"49999 True Female NaT Singapore 67.977171 alive 1989-12-04 \n", | |
"\n", | |
" cause_of_death years_of_life_lost dementia \\\n", | |
"0 not_dead 0.000000 susceptible_to_dementia \n", | |
"1 not_dead 0.000000 susceptible_to_dementia \n", | |
"2 not_dead 0.000000 susceptible_to_dementia \n", | |
"3 not_dead 0.000000 susceptible_to_dementia \n", | |
"... ... ... ... \n", | |
"49996 other_causes 9.306914 susceptible_to_dementia \n", | |
"49997 not_dead 0.000000 susceptible_to_dementia \n", | |
"49998 not_dead 0.000000 susceptible_to_dementia \n", | |
"49999 not_dead 0.000000 susceptible_to_dementia \n", | |
"\n", | |
" susceptible_to_dementia_event_count susceptible_to_dementia_event_time \\\n", | |
"0 0 NaT \n", | |
"1 0 NaT \n", | |
"2 0 NaT \n", | |
"3 0 NaT \n", | |
"... ... ... \n", | |
"49996 0 NaT \n", | |
"49997 0 NaT \n", | |
"49998 0 NaT \n", | |
"49999 0 NaT \n", | |
"\n", | |
" dementia_event_time dementia_event_count \n", | |
"0 NaT 0 \n", | |
"1 NaT 0 \n", | |
"2 NaT 0 \n", | |
"3 NaT 0 \n", | |
"... ... ... \n", | |
"49996 NaT 0 \n", | |
"49997 NaT 0 \n", | |
"49998 NaT 0 \n", | |
"49999 NaT 0 \n", | |
"\n", | |
"[49808 rows x 14 columns]" | |
] | |
}, | |
"execution_count": 51, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"%%time\n", | |
"\n", | |
"sim.run() # FIXME: progress bar is not showing up\n", | |
"\n", | |
"df1 = sim.get_population()\n", | |
"df1" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 52, | |
"id": "0e367af3", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"susceptible_to_dementia 33855\n", | |
"dementia 1481\n", | |
"Name: dementia, dtype: int64" | |
] | |
}, | |
"execution_count": 52, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"df1[df1.alive == 'alive'].dementia.value_counts()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 53, | |
"id": "064a6a50", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"not_dead 35336\n", | |
"other_causes 13722\n", | |
"dementia 750\n", | |
"Name: cause_of_death, dtype: int64" | |
] | |
}, | |
"execution_count": 53, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"df1.cause_of_death.value_counts()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 54, | |
"id": "31c725c4", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"{'base_population': BasePopulation(),\n", | |
" 'age_out_simulants': AgeOutSimulants(),\n", | |
" 'mortality': Mortality(),\n", | |
" 'disease_model.dementia': DiseaseModel(state_column=dementia),\n", | |
" 'susceptible_state.susceptible_to_dementia.None.cause': SusceptibleState(state_id=susceptible_to_dementia, side_effect_function=None, cause_type=cause),\n", | |
" 'transition_set.susceptible_to_dementia': TransitionSet(state_id=susceptible_to_dementia, transitions=[RateTransition(input_state=SusceptibleState(state_id=susceptible_to_dementia, side_effect_function=None, cause_type=cause), output_state=DiseaseState(state_id=dementia, side_effect_function=None, cause_type=cause))]),\n", | |
" \"rate_transition.'susceptible_state.susceptible_to_dementia.None.cause'.'disease_state.dementia.None.cause'\": RateTransition(input_state=SusceptibleState(state_id=susceptible_to_dementia, side_effect_function=None, cause_type=cause), output_state=DiseaseState(state_id=dementia, side_effect_function=None, cause_type=cause)),\n", | |
" 'disease_state.dementia.None.cause': DiseaseState(state_id=dementia, side_effect_function=None, cause_type=cause),\n", | |
" 'transition_set.dementia': TransitionSet(state_id=dementia, transitions=[]),\n", | |
" 'healthcare_utilization': HealthcareUtilization(),\n", | |
" 'metrics': Metrics()}" | |
] | |
}, | |
"execution_count": 54, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# sim.list_components()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 55, | |
"id": "dc7b54df", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# healthcare_utilization = sim.get_component('healthcare_utilization')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 61, | |
"id": "8342bae7", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"2 11423\n", | |
"1 11307\n", | |
"3 8556\n", | |
"4 5260\n", | |
" ... \n", | |
"13 4\n", | |
"14 2\n", | |
"16 1\n", | |
"15 1\n", | |
"Name: s_id, Length: 16, dtype: int64" | |
] | |
}, | |
"execution_count": 61, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"df = pd.concat(healthcare_utilization.background_visits)\n", | |
"df.index.name = 's_id'\n", | |
"df = df.reset_index()\n", | |
"df.s_id.value_counts().value_counts() # frequency of background visits per patient" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 62, | |
"id": "b3c850e5", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"1 314\n", | |
"2 271\n", | |
"4 255\n", | |
"3 254\n", | |
" ... \n", | |
"47 1\n", | |
"52 1\n", | |
"54 1\n", | |
"59 1\n", | |
"Name: s_id, Length: 54, dtype: int64" | |
] | |
}, | |
"execution_count": 62, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"df = pd.concat(healthcare_utilization.dementia_visits)\n", | |
"df.index.name = 's_id'\n", | |
"df = df.reset_index()\n", | |
"df.s_id.value_counts().value_counts() # frequency of dementia visits per patient" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 63, | |
"id": "e9b70fb4", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>s_id</th>\n", | |
" <th>age</th>\n", | |
" <th>sex</th>\n", | |
" <th>dementia</th>\n", | |
" <th>date</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>298</td>\n", | |
" <td>81.311118</td>\n", | |
" <td>Female</td>\n", | |
" <td>dementia</td>\n", | |
" <td>1990-01-29</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>1699</td>\n", | |
" <td>78.268114</td>\n", | |
" <td>Female</td>\n", | |
" <td>dementia</td>\n", | |
" <td>1990-01-29</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>1739</td>\n", | |
" <td>71.451667</td>\n", | |
" <td>Female</td>\n", | |
" <td>dementia</td>\n", | |
" <td>1990-01-29</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>1977</td>\n", | |
" <td>89.341515</td>\n", | |
" <td>Female</td>\n", | |
" <td>dementia</td>\n", | |
" <td>1990-01-29</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>...</th>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>37203</th>\n", | |
" <td>48966</td>\n", | |
" <td>72.428198</td>\n", | |
" <td>Male</td>\n", | |
" <td>dementia</td>\n", | |
" <td>2020-02-17</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>37204</th>\n", | |
" <td>48972</td>\n", | |
" <td>87.912832</td>\n", | |
" <td>Female</td>\n", | |
" <td>dementia</td>\n", | |
" <td>2020-02-17</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>37205</th>\n", | |
" <td>49163</td>\n", | |
" <td>78.288977</td>\n", | |
" <td>Female</td>\n", | |
" <td>dementia</td>\n", | |
" <td>2020-02-17</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>37206</th>\n", | |
" <td>49939</td>\n", | |
" <td>91.829400</td>\n", | |
" <td>Male</td>\n", | |
" <td>dementia</td>\n", | |
" <td>2020-02-17</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"<p>37207 rows × 5 columns</p>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" s_id age sex dementia date\n", | |
"0 298 81.311118 Female dementia 1990-01-29\n", | |
"1 1699 78.268114 Female dementia 1990-01-29\n", | |
"2 1739 71.451667 Female dementia 1990-01-29\n", | |
"3 1977 89.341515 Female dementia 1990-01-29\n", | |
"... ... ... ... ... ...\n", | |
"37203 48966 72.428198 Male dementia 2020-02-17\n", | |
"37204 48972 87.912832 Female dementia 2020-02-17\n", | |
"37205 49163 78.288977 Female dementia 2020-02-17\n", | |
"37206 49939 91.829400 Male dementia 2020-02-17\n", | |
"\n", | |
"[37207 rows x 5 columns]" | |
] | |
}, | |
"execution_count": 63, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"df" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "f2fb020d", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# check that only living simulants go to the hospital" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "vivarium_nih_us_cvd", | |
"language": "python", | |
"name": "vivarium_nih_us_cvd" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.11.6" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 5 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment