Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save aflaxman/189f40069c4eba8f8e04999207ab140e to your computer and use it in GitHub Desktop.
Save aflaxman/189f40069c4eba8f8e04999207ab140e to your computer and use it in GitHub Desktop.
Vivarium Public Health model for Dementia in Singapore
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "e02ba2a8",
"metadata": {},
"outputs": [],
"source": [
"import warnings\n",
"warnings.simplefilter(action='ignore', category=FutureWarning) # it would be great to update vivarium to make this unnecessary!"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "11db1f55",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Thu Jan 4 21:02:12 PST 2024\r\n"
]
}
],
"source": [
"import numpy as np, matplotlib.pyplot as plt, pandas as pd\n",
"pd.set_option('display.max_rows', 8)\n",
"!date"
]
},
{
"cell_type": "markdown",
"id": "306fe3c0",
"metadata": {},
"source": [
"# Simulate billing data for DisMod-IPD\n",
"\n",
"Extending dementia sim I prototyped last week."
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "6a536915",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Config: 'input_data:\n",
" intermediary_data_cache_path:\n",
" user_configs: /ihme/scratch/tmp/gbd_2017_cache\n",
" cache_data:\n",
" base: True'\n",
"Cache Dir: '/ihme/scratch/tmp/gbd_2017_cache'\n"
]
}
],
"source": [
"# the Artifact class is defined in the base vivarium module\n",
"import vivarium as vi\n",
"\n",
"# for creating the population and disese model components of the sim we will use vivarium_public_health\n",
"import vivarium_public_health as vph\n",
"\n",
"# and to get the data for the artifact, we will use vivarium_inputs and gbd_mapping\n",
"# note that vivarium_inputs only works when on the IHME VPN at present\n",
"import vivarium_inputs as vii, gbd_mapping"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "be49200b",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/homes/abie/.conda/envs/vivarium_nih_us_cvd/lib/python3.11/site-packages/vivarium/framework/artifact/artifact.py:69: UserWarning: No artifact found at /share/scratch/users/abie/t.hdf. Building new artifact.\n",
" warnings.warn(f\"No artifact found at {path}. Building new artifact.\")\n"
]
}
],
"source": [
"# for this example, we will create a new artifact from scratch\n",
"fname = '/share/scratch/users/abie/t.hdf'\n",
"!rm $fname\n",
"art = vi.Artifact(fname)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "9b8a21f4",
"metadata": {},
"outputs": [],
"source": [
"# It would be cool to extend vivarium_inputs methods to accept a list of locations.\n",
"# Currently, it is necessary to create a separate artifact for each location of interest.\n",
"\n",
"location = 'Singapore'\n",
"art.write('population.location', location)\n",
"\n",
"df = vii.get_population_structure(location)\n",
"art.write('population.structure', df)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "b3df805b",
"metadata": {},
"outputs": [],
"source": [
"# Calculating DALYs uses the theoretical minimu risk life expectancy (TMREL)\n",
"# and the mortality component in vivarium_public_health expect this to be \n",
"# in the artifact as well\n",
"\n",
"df = vii.get_theoretical_minimum_risk_life_expectancy()\n",
"art.write('population.theoretical_minimum_risk_life_expectancy', df)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "84f0f141",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"\u001b[32m2024-01-04 21:03:54.402\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mvivarium_inputs.validation.raw\u001b[0m:\u001b[36mcheck_columns\u001b[0m:\u001b[36m1994\u001b[0m - \u001b[33m\u001b[1mData returned extra columns: {'version_id'}.\u001b[0m\n",
"/homes/abie/.conda/envs/vivarium_nih_us_cvd/lib/python3.11/site-packages/vivarium_inputs/utilities.py:477: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n",
" data[f\"{split_column_prefix}_end\"] = [\n"
]
}
],
"source": [
"# All-cause mortality is an important model parameter\n",
"# and vivarium_public_health includes components to\n",
"# work with it, which use the all-casuse mortality rate\n",
"# from the artifact with the key cause.all_causes.cause_specific_mortality_rate\n",
"\n",
"df = vii.get_measure(gbd_mapping.causes.all_causes, 'cause_specific_mortality_rate', location)\n",
"art.write('cause.all_causes.cause_specific_mortality_rate', df.loc[location]) # TODO: it is annoying that the location needs to be stripped from the dataframe"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "166336ff",
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"# vivarium_public_health also includes disease models, such as\n",
"# vph.SI and vph.SIS that work for conditions modeled in the GBD.\n",
"# There is a list of these in the gbd_mapping.causes object\n",
"#\n",
"# vph.SI uses five epidemiological parameters, named to match the\n",
"# name used to create the vph.SI component in the model specification.\n",
"#\n",
"# For example, we will use vph.disease.SI('dementia') in our model\n",
"# below, which will require that the artifact contain data for the\n",
"# key cause.dementia.incidence_rate and four other parameters\n",
"\n",
"cause = gbd_mapping.causes.alzheimers_disease_and_other_dementias\n",
"for measure in ['incidence_rate',\n",
" 'cause_specific_mortality_rate',\n",
" 'prevalence',\n",
" 'disability_weight',\n",
" 'excess_mortality_rate',\n",
" ]:\n",
" df = vii.get_measure(cause, measure, location)\n",
" key = f'cause.dementia.{measure}'\n",
" art.write(key, df.loc[location]) # TODO: it is annoying that the location needs to be stripped from the dataframe"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "0740cf1b",
"metadata": {},
"outputs": [],
"source": [
"# the vph.SI cause model also needs data on \"restrictions\" about the disease\n",
"# (e.g. Cervical Cancer occurs only in Females) which must be stored in the\n",
"# artifact as well.\n",
"\n",
"key = 'cause.dementia.restrictions'\n",
"restriction_dict = cause.restrictions.to_dict()\n",
"art.write(key, restriction_dict)"
]
},
{
"cell_type": "markdown",
"id": "66b1738b",
"metadata": {},
"source": [
"# Add inpatient envelope to artifact\n",
"\n",
"Adapted from `vivarium_nih_us_cvd`; would be nice to fix `vivarium_inputs` so that this was easier."
]
},
{
"cell_type": "code",
"execution_count": 31,
"id": "3f844f5b",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/homes/abie/.conda/envs/vivarium_nih_us_cvd/lib/python3.11/site-packages/get_draws/api.py:313: DeprecationWarning: In GBD 2022, the arguments gbd_round_id and decomp_step will be removed. Switch to using release_id exclusively. Every GBD round and decomp step combination has an equivalent release_id: https://hub.ihme.washington.edu/x/vS7NCQ\n",
" release_helpers.validate_decomp_step_and_release_id(\n",
"/homes/abie/.conda/envs/vivarium_nih_us_cvd/lib/python3.11/site-packages/get_draws/base/utils.py:30: FutureWarning: In a future version of pandas, a length 1 tuple will be returned when iterating over a groupby with a grouper equal to a list of length 1. Don't supply a list with a single grouper to avoid this warning.\n",
" for name, group in df.groupby([group_cols]):\n",
"/homes/abie/.conda/envs/vivarium_nih_us_cvd/lib/python3.11/site-packages/get_draws/base/utils.py:30: FutureWarning: In a future version of pandas, a length 1 tuple will be returned when iterating over a groupby with a grouper equal to a list of length 1. Don't supply a list with a single grouper to avoid this warning.\n",
" for name, group in df.groupby([group_cols]):\n",
"/homes/abie/.conda/envs/vivarium_nih_us_cvd/lib/python3.11/site-packages/vivarium_inputs/utilities.py:477: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n",
" data[f\"{split_column_prefix}_end\"] = [\n"
]
}
],
"source": [
"%%time\n",
"\n",
"key = 'healthcare_entity.inpatient_visits.inpatient_envelope'\n",
"\n",
"location_id = vii.utility_data.get_location_id(location)\n",
"entity = vii.mapping_extension.healthcare_entities.inpatient_visits\n",
"\n",
"\n",
"# df = vii.core.get_utilization_rate(entity, location_id)\n",
"from vivarium_gbd_access.utilities import get_draws\n",
"from vivarium_gbd_access.constants import ROUND_IDS, SEX, SOURCES\n",
"from vivarium_gbd_access import gbd\n",
"from vivarium_inputs import utilities as vi_utils\n",
"from vivarium_inputs import globals as vi_globals\n",
"\n",
"# vivarium_inputs.core.get_utilization_rate() breaks with the hard-coded\n",
"# gbd_round_id=6; use gbd_round_id=5.\n",
"# TODO: SDB fix in vivarium_gbd_access.gbd.get_modelable_entity_draws()?\n",
"data = get_draws(\n",
" gbd_id_type=\"modelable_entity_id\",\n",
" gbd_id=entity.gbd_id,\n",
" source=SOURCES.EPI,\n",
" location_id=location_id,\n",
" sex_id=SEX.MALE + SEX.FEMALE,\n",
" age_group_id=gbd.get_age_group_id(),\n",
" gbd_round_id=ROUND_IDS.GBD_2017,\n",
" status=\"best\",\n",
")\n",
"# Fill in year gaps manually. vi_utils.normalize does not quite work because\n",
"# the data is missing required age_bin edges 2015 and 2019. Instead, let's\n",
"# assume 2018 and 2019 is the same as 2017 and interpolate everything else\n",
"tmp = data[data[\"year_id\"] == 2017]\n",
"for year in [2018, 2019]:\n",
" tmp[\"year_id\"] = year\n",
" data = pd.concat([data, tmp], axis=0)\n",
"data = vi_utils.interpolate_year(data)\n",
"\n",
"# Cleanup\n",
"data = vi_utils.normalize(data, fill_value=0)\n",
"data = data.filter(vi_globals.DEMOGRAPHIC_COLUMNS + vi_globals.DRAW_COLUMNS)\n",
"data = vi_utils.reshape(data)\n",
"data = vi_utils.scrub_gbd_conventions(data, location)\n",
"data = vi_utils.split_interval(data, interval_column=\"age\", split_column_prefix=\"age\")\n",
"data = vi_utils.split_interval(data, interval_column=\"year\", split_column_prefix=\"year\")\n",
"data = vi_utils.sort_hierarchical_data(data).droplevel(\"location\")\n",
"\n",
"art.write(key, data)"
]
},
{
"cell_type": "markdown",
"id": "25855ba2",
"metadata": {},
"source": [
"# With this minimal artifact in hand, run a sim for dementia\n",
"\n",
"And include a custom component that has simulants generate data on visits to the hospital"
]
},
{
"cell_type": "code",
"execution_count": 48,
"id": "432cc67e",
"metadata": {},
"outputs": [],
"source": [
"from vivarium import Component\n",
"\n",
"class HealthcareUtilization(Component):\n",
" \"\"\"Manages healthcare utilization\"\"\"\n",
"\n",
" ##############\n",
" # Properties #\n",
" ##############\n",
"\n",
" @property\n",
" def columns_required(self):\n",
" return [\n",
" \"age\",\n",
" \"sex\",\n",
" \"dementia\", # TODO: refactor this to be more general (possibly using a pipeline?)\n",
" ]\n",
"\n",
" @property\n",
" def initialization_requirements(self):\n",
" return {\n",
" \"requires_columns\": self.columns_required,\n",
" \"requires_values\": [\n",
" ],\n",
" \"requires_streams\": [self.name],\n",
" }\n",
"\n",
" #####################\n",
" # Lifecycle methods #\n",
" #####################\n",
" \n",
" def setup(self, builder) -> None:\n",
" self.clock = builder.time.clock()\n",
" self.step_size = builder.time.step_size()\n",
" self.randomness = builder.randomness.get_stream(self.name)\n",
"\n",
" # Load data\n",
" utilization_data = builder.data.load('healthcare_entity.inpatient_visits.inpatient_envelope')\n",
" background_utilization_rate = builder.lookup.build_table(\n",
" utilization_data, parameter_columns=[\"age\", \"year\"], key_columns=[\"sex\"]\n",
" )\n",
" self.background_utilization_rate = builder.value.register_rate_producer(\n",
" \"utilization_rate\", background_utilization_rate, requires_columns=[\"age\", \"sex\"]\n",
" )\n",
" \n",
" # HACK: store visit dates for background and dementia visits\n",
" self.background_visits = []\n",
" self.dementia_visits = []\n",
"\n",
" ########################\n",
" # Event-driven methods #\n",
" ########################\n",
"\n",
" def on_time_step(self, event) -> None:\n",
" \"\"\"Determine if someone will go for background visit or dementia visit.\n",
" \"\"\"\n",
" event_time = event.time\n",
" pop = self.population_view.get(event.index, query='alive == \"alive\"')\n",
"\n",
" # Background visits\n",
" utilization_rate = self.background_utilization_rate(pop.index)\n",
" visit_background = self.randomness.filter_for_rate(\n",
" pop.index, utilization_rate, additional_key=\"background_visits\"\n",
" ) # pd.Index\n",
"\n",
" df_visits = pop.loc[visit_background].copy()\n",
" df_visits['date'] = event_time\n",
" if len(df_visits) > 0:\n",
" self.background_visits.append(df_visits)\n",
"\n",
" # dementia visits\n",
" rows = pop[pop.dementia == 'dementia'].index\n",
" if len(rows) > 0:\n",
" utilization_rate = self.background_utilization_rate(rows)\n",
" utilization_rate *= 10 # TODO: use a pipeline, use real data\n",
"\n",
" visit_dementia = self.randomness.filter_for_rate(\n",
" rows, utilization_rate, additional_key=\"dementia_visits\"\n",
" ) # pd.Index\n",
"\n",
" df_visits = pop.loc[visit_dementia].copy()\n",
" df_visits['date'] = event_time\n",
" if len(df_visits) > 0:\n",
" self.dementia_visits.append(df_visits)\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 49,
"id": "e2d9d075",
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\u001b[32m2024-01-04 21:59:18.453\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36msimulation_5\u001b[0m-\u001b[36martifact_manager\u001b[0m:\u001b[36m67\u001b[0m - \u001b[1mRunning simulation from artifact located at /share/scratch/users/abie/t.hdf.\u001b[0m\n",
"\u001b[32m2024-01-04 21:59:18.454\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36msimulation_5\u001b[0m-\u001b[36martifact_manager\u001b[0m:\u001b[36m68\u001b[0m - \u001b[1mArtifact base filter terms are ['draw == 0'].\u001b[0m\n",
"\u001b[32m2024-01-04 21:59:18.457\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36msimulation_5\u001b[0m-\u001b[36martifact_manager\u001b[0m:\u001b[36m69\u001b[0m - \u001b[1mArtifact additional filter terms are None.\u001b[0m\n",
"\u001b[32m2024-01-04 21:59:21.609\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36msimulation_5\u001b[0m-\u001b[36mvalues_manager\u001b[0m:\u001b[36m279\u001b[0m - \u001b[33m\u001b[1mUnsourced pipelines: ['disability_weight']\u001b[0m\n",
"\u001b[32m2024-01-04 21:59:21.613\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36msimulation_5\u001b[0m-\u001b[36mresource_manager\u001b[0m:\u001b[36m256\u001b[0m - \u001b[33m\u001b[1mResource missing_value_source.disability_weight is not provided by any component but is needed to compute (value.disability_weight).\u001b[0m\n",
"CPU times: user 3.34 s, sys: 94.6 ms, total: 3.43 s\n",
"Wall time: 4.17 s\n"
]
}
],
"source": [
"%%time\n",
"\n",
"from vivarium_public_health import population, disease\n",
"\n",
"# create healthcare utilization component first, for easy access later\n",
"healthcare_utilization = HealthcareUtilization()\n",
"\n",
"# create a vivarium simulation with this SIR component, and run it\n",
"sim = vi.InteractiveContext(\n",
" components=[\n",
" vph.population.BasePopulation(),\n",
" vph.population.Mortality(),\n",
" vph.disease.SI('dementia'),\n",
" healthcare_utilization,\n",
" ],\n",
" configuration={'input_data': {'artifact_path': fname,\n",
" 'input_draw_number': 0, # FIXME: very cryptic error when this is not included in the config dict\n",
" },\n",
" 'time': {'step_size': 28, # Days\n",
" 'start': {\n",
" 'year': 1990,\n",
" 'month': 1,\n",
" 'day': 1,},\n",
" 'end': {\n",
" 'year': 2020,\n",
" 'month': 1,\n",
" 'day': 31,\n",
" },\n",
" },\n",
" 'population': {'population_size': 50_000,\n",
" 'age_start': 0,\n",
" 'age_end': 100,\n",
" 'exit_age': 100,\n",
" }\n",
" },\n",
")\n",
"df0 = sim.get_population()"
]
},
{
"cell_type": "code",
"execution_count": 50,
"id": "8b5ab4f6",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"susceptible_to_dementia 49559\n",
"dementia 441\n",
"Name: dementia, dtype: int64"
]
},
"execution_count": 50,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df0.dementia.value_counts()"
]
},
{
"cell_type": "code",
"execution_count": 51,
"id": "eae56d3e",
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "bfa8280b4be040f78dfba0c892f5d10d",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"VBox(children=(HTML(value=''), IntProgress(value=0, max=393)))"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 3min 49s, sys: 242 ms, total: 3min 50s\n",
"Wall time: 3min 49s\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>tracked</th>\n",
" <th>sex</th>\n",
" <th>exit_time</th>\n",
" <th>location</th>\n",
" <th>age</th>\n",
" <th>alive</th>\n",
" <th>entrance_time</th>\n",
" <th>cause_of_death</th>\n",
" <th>years_of_life_lost</th>\n",
" <th>dementia</th>\n",
" <th>susceptible_to_dementia_event_count</th>\n",
" <th>susceptible_to_dementia_event_time</th>\n",
" <th>dementia_event_time</th>\n",
" <th>dementia_event_count</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>True</td>\n",
" <td>Male</td>\n",
" <td>NaT</td>\n",
" <td>Singapore</td>\n",
" <td>48.370896</td>\n",
" <td>alive</td>\n",
" <td>1989-12-04</td>\n",
" <td>not_dead</td>\n",
" <td>0.000000</td>\n",
" <td>susceptible_to_dementia</td>\n",
" <td>0</td>\n",
" <td>NaT</td>\n",
" <td>NaT</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>True</td>\n",
" <td>Female</td>\n",
" <td>NaT</td>\n",
" <td>Singapore</td>\n",
" <td>66.642554</td>\n",
" <td>alive</td>\n",
" <td>1989-12-04</td>\n",
" <td>not_dead</td>\n",
" <td>0.000000</td>\n",
" <td>susceptible_to_dementia</td>\n",
" <td>0</td>\n",
" <td>NaT</td>\n",
" <td>NaT</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>True</td>\n",
" <td>Male</td>\n",
" <td>NaT</td>\n",
" <td>Singapore</td>\n",
" <td>61.370077</td>\n",
" <td>alive</td>\n",
" <td>1989-12-04</td>\n",
" <td>not_dead</td>\n",
" <td>0.000000</td>\n",
" <td>susceptible_to_dementia</td>\n",
" <td>0</td>\n",
" <td>NaT</td>\n",
" <td>NaT</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>True</td>\n",
" <td>Male</td>\n",
" <td>NaT</td>\n",
" <td>Singapore</td>\n",
" <td>81.751407</td>\n",
" <td>alive</td>\n",
" <td>1989-12-04</td>\n",
" <td>not_dead</td>\n",
" <td>0.000000</td>\n",
" <td>susceptible_to_dementia</td>\n",
" <td>0</td>\n",
" <td>NaT</td>\n",
" <td>NaT</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>49996</th>\n",
" <td>True</td>\n",
" <td>Male</td>\n",
" <td>2014-10-06</td>\n",
" <td>Singapore</td>\n",
" <td>86.447725</td>\n",
" <td>dead</td>\n",
" <td>1989-12-04</td>\n",
" <td>other_causes</td>\n",
" <td>9.306914</td>\n",
" <td>susceptible_to_dementia</td>\n",
" <td>0</td>\n",
" <td>NaT</td>\n",
" <td>NaT</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>49997</th>\n",
" <td>True</td>\n",
" <td>Male</td>\n",
" <td>NaT</td>\n",
" <td>Singapore</td>\n",
" <td>75.492170</td>\n",
" <td>alive</td>\n",
" <td>1989-12-04</td>\n",
" <td>not_dead</td>\n",
" <td>0.000000</td>\n",
" <td>susceptible_to_dementia</td>\n",
" <td>0</td>\n",
" <td>NaT</td>\n",
" <td>NaT</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>49998</th>\n",
" <td>True</td>\n",
" <td>Female</td>\n",
" <td>NaT</td>\n",
" <td>Singapore</td>\n",
" <td>51.606382</td>\n",
" <td>alive</td>\n",
" <td>1989-12-04</td>\n",
" <td>not_dead</td>\n",
" <td>0.000000</td>\n",
" <td>susceptible_to_dementia</td>\n",
" <td>0</td>\n",
" <td>NaT</td>\n",
" <td>NaT</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>49999</th>\n",
" <td>True</td>\n",
" <td>Female</td>\n",
" <td>NaT</td>\n",
" <td>Singapore</td>\n",
" <td>67.977171</td>\n",
" <td>alive</td>\n",
" <td>1989-12-04</td>\n",
" <td>not_dead</td>\n",
" <td>0.000000</td>\n",
" <td>susceptible_to_dementia</td>\n",
" <td>0</td>\n",
" <td>NaT</td>\n",
" <td>NaT</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>49808 rows × 14 columns</p>\n",
"</div>"
],
"text/plain": [
" tracked sex exit_time location age alive entrance_time \\\n",
"0 True Male NaT Singapore 48.370896 alive 1989-12-04 \n",
"1 True Female NaT Singapore 66.642554 alive 1989-12-04 \n",
"2 True Male NaT Singapore 61.370077 alive 1989-12-04 \n",
"3 True Male NaT Singapore 81.751407 alive 1989-12-04 \n",
"... ... ... ... ... ... ... ... \n",
"49996 True Male 2014-10-06 Singapore 86.447725 dead 1989-12-04 \n",
"49997 True Male NaT Singapore 75.492170 alive 1989-12-04 \n",
"49998 True Female NaT Singapore 51.606382 alive 1989-12-04 \n",
"49999 True Female NaT Singapore 67.977171 alive 1989-12-04 \n",
"\n",
" cause_of_death years_of_life_lost dementia \\\n",
"0 not_dead 0.000000 susceptible_to_dementia \n",
"1 not_dead 0.000000 susceptible_to_dementia \n",
"2 not_dead 0.000000 susceptible_to_dementia \n",
"3 not_dead 0.000000 susceptible_to_dementia \n",
"... ... ... ... \n",
"49996 other_causes 9.306914 susceptible_to_dementia \n",
"49997 not_dead 0.000000 susceptible_to_dementia \n",
"49998 not_dead 0.000000 susceptible_to_dementia \n",
"49999 not_dead 0.000000 susceptible_to_dementia \n",
"\n",
" susceptible_to_dementia_event_count susceptible_to_dementia_event_time \\\n",
"0 0 NaT \n",
"1 0 NaT \n",
"2 0 NaT \n",
"3 0 NaT \n",
"... ... ... \n",
"49996 0 NaT \n",
"49997 0 NaT \n",
"49998 0 NaT \n",
"49999 0 NaT \n",
"\n",
" dementia_event_time dementia_event_count \n",
"0 NaT 0 \n",
"1 NaT 0 \n",
"2 NaT 0 \n",
"3 NaT 0 \n",
"... ... ... \n",
"49996 NaT 0 \n",
"49997 NaT 0 \n",
"49998 NaT 0 \n",
"49999 NaT 0 \n",
"\n",
"[49808 rows x 14 columns]"
]
},
"execution_count": 51,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"%%time\n",
"\n",
"sim.run() # FIXME: progress bar is not showing up\n",
"\n",
"df1 = sim.get_population()\n",
"df1"
]
},
{
"cell_type": "code",
"execution_count": 52,
"id": "0e367af3",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"susceptible_to_dementia 33855\n",
"dementia 1481\n",
"Name: dementia, dtype: int64"
]
},
"execution_count": 52,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df1[df1.alive == 'alive'].dementia.value_counts()"
]
},
{
"cell_type": "code",
"execution_count": 53,
"id": "064a6a50",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"not_dead 35336\n",
"other_causes 13722\n",
"dementia 750\n",
"Name: cause_of_death, dtype: int64"
]
},
"execution_count": 53,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df1.cause_of_death.value_counts()"
]
},
{
"cell_type": "code",
"execution_count": 54,
"id": "31c725c4",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'base_population': BasePopulation(),\n",
" 'age_out_simulants': AgeOutSimulants(),\n",
" 'mortality': Mortality(),\n",
" 'disease_model.dementia': DiseaseModel(state_column=dementia),\n",
" 'susceptible_state.susceptible_to_dementia.None.cause': SusceptibleState(state_id=susceptible_to_dementia, side_effect_function=None, cause_type=cause),\n",
" 'transition_set.susceptible_to_dementia': TransitionSet(state_id=susceptible_to_dementia, transitions=[RateTransition(input_state=SusceptibleState(state_id=susceptible_to_dementia, side_effect_function=None, cause_type=cause), output_state=DiseaseState(state_id=dementia, side_effect_function=None, cause_type=cause))]),\n",
" \"rate_transition.'susceptible_state.susceptible_to_dementia.None.cause'.'disease_state.dementia.None.cause'\": RateTransition(input_state=SusceptibleState(state_id=susceptible_to_dementia, side_effect_function=None, cause_type=cause), output_state=DiseaseState(state_id=dementia, side_effect_function=None, cause_type=cause)),\n",
" 'disease_state.dementia.None.cause': DiseaseState(state_id=dementia, side_effect_function=None, cause_type=cause),\n",
" 'transition_set.dementia': TransitionSet(state_id=dementia, transitions=[]),\n",
" 'healthcare_utilization': HealthcareUtilization(),\n",
" 'metrics': Metrics()}"
]
},
"execution_count": 54,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# sim.list_components()"
]
},
{
"cell_type": "code",
"execution_count": 55,
"id": "dc7b54df",
"metadata": {},
"outputs": [],
"source": [
"# healthcare_utilization = sim.get_component('healthcare_utilization')"
]
},
{
"cell_type": "code",
"execution_count": 61,
"id": "8342bae7",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"2 11423\n",
"1 11307\n",
"3 8556\n",
"4 5260\n",
" ... \n",
"13 4\n",
"14 2\n",
"16 1\n",
"15 1\n",
"Name: s_id, Length: 16, dtype: int64"
]
},
"execution_count": 61,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df = pd.concat(healthcare_utilization.background_visits)\n",
"df.index.name = 's_id'\n",
"df = df.reset_index()\n",
"df.s_id.value_counts().value_counts() # frequency of background visits per patient"
]
},
{
"cell_type": "code",
"execution_count": 62,
"id": "b3c850e5",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"1 314\n",
"2 271\n",
"4 255\n",
"3 254\n",
" ... \n",
"47 1\n",
"52 1\n",
"54 1\n",
"59 1\n",
"Name: s_id, Length: 54, dtype: int64"
]
},
"execution_count": 62,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df = pd.concat(healthcare_utilization.dementia_visits)\n",
"df.index.name = 's_id'\n",
"df = df.reset_index()\n",
"df.s_id.value_counts().value_counts() # frequency of dementia visits per patient"
]
},
{
"cell_type": "code",
"execution_count": 63,
"id": "e9b70fb4",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>s_id</th>\n",
" <th>age</th>\n",
" <th>sex</th>\n",
" <th>dementia</th>\n",
" <th>date</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>298</td>\n",
" <td>81.311118</td>\n",
" <td>Female</td>\n",
" <td>dementia</td>\n",
" <td>1990-01-29</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1699</td>\n",
" <td>78.268114</td>\n",
" <td>Female</td>\n",
" <td>dementia</td>\n",
" <td>1990-01-29</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1739</td>\n",
" <td>71.451667</td>\n",
" <td>Female</td>\n",
" <td>dementia</td>\n",
" <td>1990-01-29</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>1977</td>\n",
" <td>89.341515</td>\n",
" <td>Female</td>\n",
" <td>dementia</td>\n",
" <td>1990-01-29</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>37203</th>\n",
" <td>48966</td>\n",
" <td>72.428198</td>\n",
" <td>Male</td>\n",
" <td>dementia</td>\n",
" <td>2020-02-17</td>\n",
" </tr>\n",
" <tr>\n",
" <th>37204</th>\n",
" <td>48972</td>\n",
" <td>87.912832</td>\n",
" <td>Female</td>\n",
" <td>dementia</td>\n",
" <td>2020-02-17</td>\n",
" </tr>\n",
" <tr>\n",
" <th>37205</th>\n",
" <td>49163</td>\n",
" <td>78.288977</td>\n",
" <td>Female</td>\n",
" <td>dementia</td>\n",
" <td>2020-02-17</td>\n",
" </tr>\n",
" <tr>\n",
" <th>37206</th>\n",
" <td>49939</td>\n",
" <td>91.829400</td>\n",
" <td>Male</td>\n",
" <td>dementia</td>\n",
" <td>2020-02-17</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>37207 rows × 5 columns</p>\n",
"</div>"
],
"text/plain": [
" s_id age sex dementia date\n",
"0 298 81.311118 Female dementia 1990-01-29\n",
"1 1699 78.268114 Female dementia 1990-01-29\n",
"2 1739 71.451667 Female dementia 1990-01-29\n",
"3 1977 89.341515 Female dementia 1990-01-29\n",
"... ... ... ... ... ...\n",
"37203 48966 72.428198 Male dementia 2020-02-17\n",
"37204 48972 87.912832 Female dementia 2020-02-17\n",
"37205 49163 78.288977 Female dementia 2020-02-17\n",
"37206 49939 91.829400 Male dementia 2020-02-17\n",
"\n",
"[37207 rows x 5 columns]"
]
},
"execution_count": 63,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f2fb020d",
"metadata": {},
"outputs": [],
"source": [
"# check that only living simulants go to the hospital"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "vivarium_nih_us_cvd",
"language": "python",
"name": "vivarium_nih_us_cvd"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment