aflaxman · January 5, 2024 17:54
diff --git a/2024_01_04a_vph_dementia_dismod_ipd_sim.ipynb b/2024_01_04a_vph_dementia_dismod_ipd_sim.ipynb
 {
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "e02ba2a8",
   "metadata": {},
   "outputs": [],
   "source": [
    "import warnings\n",
    "warnings.simplefilter(action='ignore', category=FutureWarning)  # it would be great to update vivarium to make this unnecessary!"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "11db1f55",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Thu Jan  4 21:02:12 PST 2024\r\n"
     ]
    }
   ],
   "source": [
    "import numpy as np, matplotlib.pyplot as plt, pandas as pd\n",
    "pd.set_option('display.max_rows', 8)\n",
    "!date"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "306fe3c0",
   "metadata": {},
   "source": [
    "# Simulate billing data for DisMod-IPD\n",
    "\n",
    "Extending dementia sim I prototyped last week."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "6a536915",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Config: 'input_data:\n",
      "    intermediary_data_cache_path:\n",
      "        user_configs: /ihme/scratch/tmp/gbd_2017_cache\n",
      "    cache_data:\n",
      "        base: True'\n",
      "Cache Dir: '/ihme/scratch/tmp/gbd_2017_cache'\n"
     ]
    }
   ],
   "source": [
    "# the Artifact class is defined in the base vivarium module\n",
    "import vivarium as vi\n",
    "\n",
    "# for creating the population and disese model components of the sim we will use vivarium_public_health\n",
    "import vivarium_public_health as vph\n",
    "\n",
    "# and to get the data for the artifact, we will use vivarium_inputs and gbd_mapping\n",
    "# note that vivarium_inputs only works when on the IHME VPN at present\n",
    "import vivarium_inputs as vii, gbd_mapping"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "be49200b",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/homes/abie/.conda/envs/vivarium_nih_us_cvd/lib/python3.11/site-packages/vivarium/framework/artifact/artifact.py:69: UserWarning: No artifact found at /share/scratch/users/abie/t.hdf. Building new artifact.\n",
      "  warnings.warn(f\"No artifact found at {path}. Building new artifact.\")\n"
     ]
    }
   ],
   "source": [
    "# for this example, we will create a new artifact from scratch\n",
    "fname = '/share/scratch/users/abie/t.hdf'\n",
    "!rm $fname\n",
    "art = vi.Artifact(fname)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "9b8a21f4",
   "metadata": {},
   "outputs": [],
   "source": [
    "# It would be cool to extend vivarium_inputs methods to accept a list of locations.\n",
    "# Currently, it is necessary to create a separate artifact for each location of interest.\n",
    "\n",
    "location = 'Singapore'\n",
    "art.write('population.location', location)\n",
    "\n",
    "df = vii.get_population_structure(location)\n",
    "art.write('population.structure', df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "b3df805b",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Calculating DALYs uses the theoretical minimu risk life expectancy (TMREL)\n",
    "# and the mortality component in vivarium_public_health expect this to be \n",
    "# in the artifact as well\n",
    "\n",
    "df = vii.get_theoretical_minimum_risk_life_expectancy()\n",
    "art.write('population.theoretical_minimum_risk_life_expectancy', df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "84f0f141",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\u001b[32m2024-01-04 21:03:54.402\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mvivarium_inputs.validation.raw\u001b[0m:\u001b[36mcheck_columns\u001b[0m:\u001b[36m1994\u001b[0m - \u001b[33m\u001b[1mData returned extra columns: {'version_id'}.\u001b[0m\n",
      "/homes/abie/.conda/envs/vivarium_nih_us_cvd/lib/python3.11/site-packages/vivarium_inputs/utilities.py:477: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n",
      "  data[f\"{split_column_prefix}_end\"] = [\n"
     ]
    }
   ],
   "source": [
    "# All-cause mortality is an important model parameter\n",
    "# and vivarium_public_health includes components to\n",
    "# work with it, which use the all-casuse mortality rate\n",
    "# from the artifact with the key cause.all_causes.cause_specific_mortality_rate\n",
    "\n",
    "df = vii.get_measure(gbd_mapping.causes.all_causes, 'cause_specific_mortality_rate', location)\n",
    "art.write('cause.all_causes.cause_specific_mortality_rate', df.loc[location])  # TODO: it is annoying that the location needs to be stripped from the dataframe"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "166336ff",
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "# vivarium_public_health also includes disease models, such as\n",
    "# vph.SI and vph.SIS that work for conditions modeled in the GBD.\n",
    "# There is a list of these in the gbd_mapping.causes object\n",
    "#\n",
    "# vph.SI uses five epidemiological parameters, named to match the\n",
    "# name used to create the vph.SI component in the model specification.\n",
    "#\n",
    "# For example, we will use vph.disease.SI('dementia') in our model\n",
    "# below, which will require that the artifact contain data for the\n",
    "# key cause.dementia.incidence_rate and four other parameters\n",
    "\n",
    "cause = gbd_mapping.causes.alzheimers_disease_and_other_dementias\n",
    "for measure in ['incidence_rate',\n",
    "                'cause_specific_mortality_rate',\n",
    "                'prevalence',\n",
    "                'disability_weight',\n",
    "                'excess_mortality_rate',\n",
    "               ]:\n",
    "    df = vii.get_measure(cause, measure, location)\n",
    "    key = f'cause.dementia.{measure}'\n",
    "    art.write(key, df.loc[location]) # TODO: it is annoying that the location needs to be stripped from the dataframe"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "0740cf1b",
   "metadata": {},
   "outputs": [],
   "source": [
    "# the vph.SI cause model also needs data on \"restrictions\" about the disease\n",
    "# (e.g. Cervical Cancer occurs only in Females) which must be stored in the\n",
    "# artifact as well.\n",
    "\n",
    "key = 'cause.dementia.restrictions'\n",
    "restriction_dict = cause.restrictions.to_dict()\n",
    "art.write(key, restriction_dict)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "66b1738b",
   "metadata": {},
   "source": [
    "# Add inpatient envelope to artifact\n",
    "\n",
    "Adapted from `vivarium_nih_us_cvd`; would be nice to fix `vivarium_inputs` so that this was easier."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "id": "3f844f5b",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/homes/abie/.conda/envs/vivarium_nih_us_cvd/lib/python3.11/site-packages/get_draws/api.py:313: DeprecationWarning: In GBD 2022, the arguments gbd_round_id and decomp_step will be removed. Switch to using release_id exclusively. Every GBD round and decomp step combination has an equivalent release_id: https://hub.ihme.washington.edu/x/vS7NCQ\n",
      "  release_helpers.validate_decomp_step_and_release_id(\n",
      "/homes/abie/.conda/envs/vivarium_nih_us_cvd/lib/python3.11/site-packages/get_draws/base/utils.py:30: FutureWarning: In a future version of pandas, a length 1 tuple will be returned when iterating over a groupby with a grouper equal to a list of length 1. Don't supply a list with a single grouper to avoid this warning.\n",
      "  for name, group in df.groupby([group_cols]):\n",
      "/homes/abie/.conda/envs/vivarium_nih_us_cvd/lib/python3.11/site-packages/get_draws/base/utils.py:30: FutureWarning: In a future version of pandas, a length 1 tuple will be returned when iterating over a groupby with a grouper equal to a list of length 1. Don't supply a list with a single grouper to avoid this warning.\n",
      "  for name, group in df.groupby([group_cols]):\n",
      "/homes/abie/.conda/envs/vivarium_nih_us_cvd/lib/python3.11/site-packages/vivarium_inputs/utilities.py:477: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n",
      "  data[f\"{split_column_prefix}_end\"] = [\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "\n",
    "key = 'healthcare_entity.inpatient_visits.inpatient_envelope'\n",
    "\n",
    "location_id = vii.utility_data.get_location_id(location)\n",
    "entity = vii.mapping_extension.healthcare_entities.inpatient_visits\n",
    "\n",
    "\n",
    "# df = vii.core.get_utilization_rate(entity, location_id)\n",
    "from vivarium_gbd_access.utilities import get_draws\n",
    "from vivarium_gbd_access.constants import ROUND_IDS, SEX, SOURCES\n",
    "from vivarium_gbd_access import gbd\n",
    "from vivarium_inputs import utilities as vi_utils\n",
    "from vivarium_inputs import globals as vi_globals\n",
    "\n",
    "# vivarium_inputs.core.get_utilization_rate() breaks with the hard-coded\n",
    "# gbd_round_id=6; use gbd_round_id=5.\n",
    "# TODO: SDB fix in vivarium_gbd_access.gbd.get_modelable_entity_draws()?\n",
    "data = get_draws(\n",
    "    gbd_id_type=\"modelable_entity_id\",\n",
    "    gbd_id=entity.gbd_id,\n",
    "    source=SOURCES.EPI,\n",
    "    location_id=location_id,\n",
    "    sex_id=SEX.MALE + SEX.FEMALE,\n",
    "    age_group_id=gbd.get_age_group_id(),\n",
    "    gbd_round_id=ROUND_IDS.GBD_2017,\n",
    "    status=\"best\",\n",
    ")\n",
    "# Fill in year gaps manually. vi_utils.normalize does not quite work because\n",
    "# the data is missing required age_bin edges 2015 and 2019. Instead, let's\n",
    "# assume 2018 and 2019 is the same as 2017 and interpolate everything else\n",
    "tmp = data[data[\"year_id\"] == 2017]\n",
    "for year in [2018, 2019]:\n",
    "    tmp[\"year_id\"] = year\n",
    "    data = pd.concat([data, tmp], axis=0)\n",
    "data = vi_utils.interpolate_year(data)\n",
    "\n",
    "# Cleanup\n",
    "data = vi_utils.normalize(data, fill_value=0)\n",
    "data = data.filter(vi_globals.DEMOGRAPHIC_COLUMNS + vi_globals.DRAW_COLUMNS)\n",
    "data = vi_utils.reshape(data)\n",
    "data = vi_utils.scrub_gbd_conventions(data, location)\n",
    "data = vi_utils.split_interval(data, interval_column=\"age\", split_column_prefix=\"age\")\n",
    "data = vi_utils.split_interval(data, interval_column=\"year\", split_column_prefix=\"year\")\n",
    "data = vi_utils.sort_hierarchical_data(data).droplevel(\"location\")\n",
    "\n",
    "art.write(key, data)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "25855ba2",
   "metadata": {},
   "source": [
    "# With this minimal artifact in hand, run a sim for dementia\n",
    "\n",
    "And include a custom component that has simulants generate data on visits to the hospital"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "id": "432cc67e",
   "metadata": {},
   "outputs": [],
   "source": [
    "from vivarium import Component\n",
    "\n",
    "class HealthcareUtilization(Component):\n",
    "    \"\"\"Manages healthcare utilization\"\"\"\n",
    "\n",
    "    ##############\n",
    "    # Properties #\n",
    "    ##############\n",
    "\n",
    "    @property\n",
    "    def columns_required(self):\n",
    "        return [\n",
    "            \"age\",\n",
    "            \"sex\",\n",
    "            \"dementia\", # TODO: refactor this to be more general (possibly using a pipeline?)\n",
    "        ]\n",
    "\n",
    "    @property\n",
    "    def initialization_requirements(self):\n",
    "        return {\n",
    "            \"requires_columns\": self.columns_required,\n",
    "            \"requires_values\": [\n",
    "            ],\n",
    "            \"requires_streams\": [self.name],\n",
    "        }\n",
    "\n",
    "    #####################\n",
    "    # Lifecycle methods #\n",
    "    #####################\n",
    "    \n",
    "    def setup(self, builder) -> None:\n",
    "        self.clock = builder.time.clock()\n",
    "        self.step_size = builder.time.step_size()\n",
    "        self.randomness = builder.randomness.get_stream(self.name)\n",
    "\n",
    "        # Load data\n",
    "        utilization_data = builder.data.load('healthcare_entity.inpatient_visits.inpatient_envelope')\n",
    "        background_utilization_rate = builder.lookup.build_table(\n",
    "            utilization_data, parameter_columns=[\"age\", \"year\"], key_columns=[\"sex\"]\n",
    "        )\n",
    "        self.background_utilization_rate = builder.value.register_rate_producer(\n",
    "            \"utilization_rate\", background_utilization_rate, requires_columns=[\"age\", \"sex\"]\n",
    "        )\n",
    "        \n",
    "        # HACK: store visit dates for background and dementia visits\n",
    "        self.background_visits = []\n",
    "        self.dementia_visits = []\n",
    "\n",
    "    ########################\n",
    "    # Event-driven methods #\n",
    "    ########################\n",
    "\n",
    "    def on_time_step(self, event) -> None:\n",
    "        \"\"\"Determine if someone will go for background visit or dementia visit.\n",
    "        \"\"\"\n",
    "        event_time = event.time\n",
    "        pop = self.population_view.get(event.index, query='alive == \"alive\"')\n",
    "\n",
    "        # Background visits\n",
    "        utilization_rate = self.background_utilization_rate(pop.index)\n",
    "        visit_background = self.randomness.filter_for_rate(\n",
    "            pop.index, utilization_rate, additional_key=\"background_visits\"\n",
    "        )  # pd.Index\n",
    "\n",
    "        df_visits = pop.loc[visit_background].copy()\n",
    "        df_visits['date'] = event_time\n",
    "        if len(df_visits) > 0:\n",
    "            self.background_visits.append(df_visits)\n",
    "\n",
    "        # dementia visits\n",
    "        rows = pop[pop.dementia == 'dementia'].index\n",
    "        if len(rows) > 0:\n",
    "            utilization_rate = self.background_utilization_rate(rows)\n",
    "            utilization_rate *= 10  # TODO: use a pipeline, use real data\n",
    "\n",
    "            visit_dementia = self.randomness.filter_for_rate(\n",
    "                rows, utilization_rate, additional_key=\"dementia_visits\"\n",
    "            )  # pd.Index\n",
    "\n",
    "            df_visits = pop.loc[visit_dementia].copy()\n",
    "            df_visits['date'] = event_time\n",
    "            if len(df_visits) > 0:\n",
    "                self.dementia_visits.append(df_visits)\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "id": "e2d9d075",
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\u001b[32m2024-01-04 21:59:18.453\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36msimulation_5\u001b[0m-\u001b[36martifact_manager\u001b[0m:\u001b[36m67\u001b[0m - \u001b[1mRunning simulation from artifact located at /share/scratch/users/abie/t.hdf.\u001b[0m\n",
      "\u001b[32m2024-01-04 21:59:18.454\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36msimulation_5\u001b[0m-\u001b[36martifact_manager\u001b[0m:\u001b[36m68\u001b[0m - \u001b[1mArtifact base filter terms are ['draw == 0'].\u001b[0m\n",
      "\u001b[32m2024-01-04 21:59:18.457\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36msimulation_5\u001b[0m-\u001b[36martifact_manager\u001b[0m:\u001b[36m69\u001b[0m - \u001b[1mArtifact additional filter terms are None.\u001b[0m\n",
      "\u001b[32m2024-01-04 21:59:21.609\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36msimulation_5\u001b[0m-\u001b[36mvalues_manager\u001b[0m:\u001b[36m279\u001b[0m - \u001b[33m\u001b[1mUnsourced pipelines: ['disability_weight']\u001b[0m\n",
      "\u001b[32m2024-01-04 21:59:21.613\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36msimulation_5\u001b[0m-\u001b[36mresource_manager\u001b[0m:\u001b[36m256\u001b[0m - \u001b[33m\u001b[1mResource missing_value_source.disability_weight is not provided by any component but is needed to compute (value.disability_weight).\u001b[0m\n",
      "CPU times: user 3.34 s, sys: 94.6 ms, total: 3.43 s\n",
      "Wall time: 4.17 s\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "\n",
    "from vivarium_public_health import population, disease\n",
    "\n",
    "# create healthcare utilization component first, for easy access later\n",
    "healthcare_utilization = HealthcareUtilization()\n",
    "\n",
    "# create a vivarium simulation with this SIR component, and run it\n",
    "sim = vi.InteractiveContext(\n",
    "    components=[\n",
    "        vph.population.BasePopulation(),\n",
    "        vph.population.Mortality(),\n",
    "        vph.disease.SI('dementia'),\n",
    "        healthcare_utilization,\n",
    "    ],\n",
    "    configuration={'input_data': {'artifact_path': fname,\n",
    "                                  'input_draw_number': 0,  # FIXME: very cryptic error when this is not included in the config dict\n",
    "                                 },\n",
    "                   'time': {'step_size': 28, # Days\n",
    "                            'start': {\n",
    "                                'year': 1990,\n",
    "                                'month': 1,\n",
    "                                'day': 1,},\n",
    "                            'end': {\n",
    "                                'year': 2020,\n",
    "                                'month': 1,\n",
    "                                'day': 31,\n",
    "                           },\n",
    "                      },\n",
    "                    'population': {'population_size': 50_000,\n",
    "                                   'age_start': 0,\n",
    "                                   'age_end': 100,\n",
    "                                   'exit_age': 100,\n",
    "                                  }\n",
    "                  },\n",
    ")\n",
    "df0 = sim.get_population()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "id": "8b5ab4f6",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "susceptible_to_dementia    49559\n",
       "dementia                     441\n",
       "Name: dementia, dtype: int64"
      ]
     },
     "execution_count": 50,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df0.dementia.value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "id": "eae56d3e",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "bfa8280b4be040f78dfba0c892f5d10d",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "VBox(children=(HTML(value=''), IntProgress(value=0, max=393)))"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 3min 49s, sys: 242 ms, total: 3min 50s\n",
      "Wall time: 3min 49s\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>tracked</th>\n",
       "      <th>sex</th>\n",
       "      <th>exit_time</th>\n",
       "      <th>location</th>\n",
       "      <th>age</th>\n",
       "      <th>alive</th>\n",
       "      <th>entrance_time</th>\n",
       "      <th>cause_of_death</th>\n",
       "      <th>years_of_life_lost</th>\n",
       "      <th>dementia</th>\n",
       "      <th>susceptible_to_dementia_event_count</th>\n",
       "      <th>susceptible_to_dementia_event_time</th>\n",
       "      <th>dementia_event_time</th>\n",
       "      <th>dementia_event_count</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>True</td>\n",
       "      <td>Male</td>\n",
       "      <td>NaT</td>\n",
       "      <td>Singapore</td>\n",
       "      <td>48.370896</td>\n",
       "      <td>alive</td>\n",
       "      <td>1989-12-04</td>\n",
       "      <td>not_dead</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>susceptible_to_dementia</td>\n",
       "      <td>0</td>\n",
       "      <td>NaT</td>\n",
       "      <td>NaT</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>True</td>\n",
       "      <td>Female</td>\n",
       "      <td>NaT</td>\n",
       "      <td>Singapore</td>\n",
       "      <td>66.642554</td>\n",
       "      <td>alive</td>\n",
       "      <td>1989-12-04</td>\n",
       "      <td>not_dead</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>susceptible_to_dementia</td>\n",
       "      <td>0</td>\n",
       "      <td>NaT</td>\n",
       "      <td>NaT</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>True</td>\n",
       "      <td>Male</td>\n",
       "      <td>NaT</td>\n",
       "      <td>Singapore</td>\n",
       "      <td>61.370077</td>\n",
       "      <td>alive</td>\n",
       "      <td>1989-12-04</td>\n",
       "      <td>not_dead</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>susceptible_to_dementia</td>\n",
       "      <td>0</td>\n",
       "      <td>NaT</td>\n",
       "      <td>NaT</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>True</td>\n",
       "      <td>Male</td>\n",
       "      <td>NaT</td>\n",
       "      <td>Singapore</td>\n",
       "      <td>81.751407</td>\n",
       "      <td>alive</td>\n",
       "      <td>1989-12-04</td>\n",
       "      <td>not_dead</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>susceptible_to_dementia</td>\n",
       "      <td>0</td>\n",
       "      <td>NaT</td>\n",
       "      <td>NaT</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>49996</th>\n",
       "      <td>True</td>\n",
       "      <td>Male</td>\n",
       "      <td>2014-10-06</td>\n",
       "      <td>Singapore</td>\n",
       "      <td>86.447725</td>\n",
       "      <td>dead</td>\n",
       "      <td>1989-12-04</td>\n",
       "      <td>other_causes</td>\n",
       "      <td>9.306914</td>\n",
       "      <td>susceptible_to_dementia</td>\n",
       "      <td>0</td>\n",
       "      <td>NaT</td>\n",
       "      <td>NaT</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>49997</th>\n",
       "      <td>True</td>\n",
       "      <td>Male</td>\n",
       "      <td>NaT</td>\n",
       "      <td>Singapore</td>\n",
       "      <td>75.492170</td>\n",
       "      <td>alive</td>\n",
       "      <td>1989-12-04</td>\n",
       "      <td>not_dead</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>susceptible_to_dementia</td>\n",
       "      <td>0</td>\n",
       "      <td>NaT</td>\n",
       "      <td>NaT</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>49998</th>\n",
       "      <td>True</td>\n",
       "      <td>Female</td>\n",
       "      <td>NaT</td>\n",
       "      <td>Singapore</td>\n",
       "      <td>51.606382</td>\n",
       "      <td>alive</td>\n",
       "      <td>1989-12-04</td>\n",
       "      <td>not_dead</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>susceptible_to_dementia</td>\n",
       "      <td>0</td>\n",
       "      <td>NaT</td>\n",
       "      <td>NaT</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>49999</th>\n",
       "      <td>True</td>\n",
       "      <td>Female</td>\n",
       "      <td>NaT</td>\n",
       "      <td>Singapore</td>\n",
       "      <td>67.977171</td>\n",
       "      <td>alive</td>\n",
       "      <td>1989-12-04</td>\n",
       "      <td>not_dead</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>susceptible_to_dementia</td>\n",
       "      <td>0</td>\n",
       "      <td>NaT</td>\n",
       "      <td>NaT</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>49808 rows × 14 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "       tracked     sex  exit_time   location        age  alive entrance_time  \\\n",
       "0         True    Male        NaT  Singapore  48.370896  alive    1989-12-04   \n",
       "1         True  Female        NaT  Singapore  66.642554  alive    1989-12-04   \n",
       "2         True    Male        NaT  Singapore  61.370077  alive    1989-12-04   \n",
       "3         True    Male        NaT  Singapore  81.751407  alive    1989-12-04   \n",
       "...        ...     ...        ...        ...        ...    ...           ...   \n",
       "49996     True    Male 2014-10-06  Singapore  86.447725   dead    1989-12-04   \n",
       "49997     True    Male        NaT  Singapore  75.492170  alive    1989-12-04   \n",
       "49998     True  Female        NaT  Singapore  51.606382  alive    1989-12-04   \n",
       "49999     True  Female        NaT  Singapore  67.977171  alive    1989-12-04   \n",
       "\n",
       "      cause_of_death  years_of_life_lost                 dementia  \\\n",
       "0           not_dead            0.000000  susceptible_to_dementia   \n",
       "1           not_dead            0.000000  susceptible_to_dementia   \n",
       "2           not_dead            0.000000  susceptible_to_dementia   \n",
       "3           not_dead            0.000000  susceptible_to_dementia   \n",
       "...              ...                 ...                      ...   \n",
       "49996   other_causes            9.306914  susceptible_to_dementia   \n",
       "49997       not_dead            0.000000  susceptible_to_dementia   \n",
       "49998       not_dead            0.000000  susceptible_to_dementia   \n",
       "49999       not_dead            0.000000  susceptible_to_dementia   \n",
       "\n",
       "       susceptible_to_dementia_event_count susceptible_to_dementia_event_time  \\\n",
       "0                                        0                                NaT   \n",
       "1                                        0                                NaT   \n",
       "2                                        0                                NaT   \n",
       "3                                        0                                NaT   \n",
       "...                                    ...                                ...   \n",
       "49996                                    0                                NaT   \n",
       "49997                                    0                                NaT   \n",
       "49998                                    0                                NaT   \n",
       "49999                                    0                                NaT   \n",
       "\n",
       "      dementia_event_time  dementia_event_count  \n",
       "0                     NaT                     0  \n",
       "1                     NaT                     0  \n",
       "2                     NaT                     0  \n",
       "3                     NaT                     0  \n",
       "...                   ...                   ...  \n",
       "49996                 NaT                     0  \n",
       "49997                 NaT                     0  \n",
       "49998                 NaT                     0  \n",
       "49999                 NaT                     0  \n",
       "\n",
       "[49808 rows x 14 columns]"
      ]
     },
     "execution_count": 51,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "%%time\n",
    "\n",
    "sim.run()  # FIXME: progress bar is not showing up\n",
    "\n",
    "df1 = sim.get_population()\n",
    "df1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "id": "0e367af3",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "susceptible_to_dementia    33855\n",
       "dementia                    1481\n",
       "Name: dementia, dtype: int64"
      ]
     },
     "execution_count": 52,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df1[df1.alive == 'alive'].dementia.value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 53,
   "id": "064a6a50",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "not_dead        35336\n",
       "other_causes    13722\n",
       "dementia          750\n",
       "Name: cause_of_death, dtype: int64"
      ]
     },
     "execution_count": 53,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df1.cause_of_death.value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 54,
   "id": "31c725c4",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'base_population': BasePopulation(),\n",
       " 'age_out_simulants': AgeOutSimulants(),\n",
       " 'mortality': Mortality(),\n",
       " 'disease_model.dementia': DiseaseModel(state_column=dementia),\n",
       " 'susceptible_state.susceptible_to_dementia.None.cause': SusceptibleState(state_id=susceptible_to_dementia, side_effect_function=None, cause_type=cause),\n",
       " 'transition_set.susceptible_to_dementia': TransitionSet(state_id=susceptible_to_dementia, transitions=[RateTransition(input_state=SusceptibleState(state_id=susceptible_to_dementia, side_effect_function=None, cause_type=cause), output_state=DiseaseState(state_id=dementia, side_effect_function=None, cause_type=cause))]),\n",
       " \"rate_transition.'susceptible_state.susceptible_to_dementia.None.cause'.'disease_state.dementia.None.cause'\": RateTransition(input_state=SusceptibleState(state_id=susceptible_to_dementia, side_effect_function=None, cause_type=cause), output_state=DiseaseState(state_id=dementia, side_effect_function=None, cause_type=cause)),\n",
       " 'disease_state.dementia.None.cause': DiseaseState(state_id=dementia, side_effect_function=None, cause_type=cause),\n",
       " 'transition_set.dementia': TransitionSet(state_id=dementia, transitions=[]),\n",
       " 'healthcare_utilization': HealthcareUtilization(),\n",
       " 'metrics': Metrics()}"
      ]
     },
     "execution_count": 54,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# sim.list_components()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 55,
   "id": "dc7b54df",
   "metadata": {},
   "outputs": [],
   "source": [
    "# healthcare_utilization = sim.get_component('healthcare_utilization')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 61,
   "id": "8342bae7",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "2     11423\n",
       "1     11307\n",
       "3      8556\n",
       "4      5260\n",
       "      ...  \n",
       "13        4\n",
       "14        2\n",
       "16        1\n",
       "15        1\n",
       "Name: s_id, Length: 16, dtype: int64"
      ]
     },
     "execution_count": 61,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = pd.concat(healthcare_utilization.background_visits)\n",
    "df.index.name = 's_id'\n",
    "df = df.reset_index()\n",
    "df.s_id.value_counts().value_counts() # frequency of background visits per patient"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 62,
   "id": "b3c850e5",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1     314\n",
       "2     271\n",
       "4     255\n",
       "3     254\n",
       "     ... \n",
       "47      1\n",
       "52      1\n",
       "54      1\n",
       "59      1\n",
       "Name: s_id, Length: 54, dtype: int64"
      ]
     },
     "execution_count": 62,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = pd.concat(healthcare_utilization.dementia_visits)\n",
    "df.index.name = 's_id'\n",
    "df = df.reset_index()\n",
    "df.s_id.value_counts().value_counts() # frequency of dementia visits per patient"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 63,
   "id": "e9b70fb4",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>s_id</th>\n",
       "      <th>age</th>\n",
       "      <th>sex</th>\n",
       "      <th>dementia</th>\n",
       "      <th>date</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>298</td>\n",
       "      <td>81.311118</td>\n",
       "      <td>Female</td>\n",
       "      <td>dementia</td>\n",
       "      <td>1990-01-29</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1699</td>\n",
       "      <td>78.268114</td>\n",
       "      <td>Female</td>\n",
       "      <td>dementia</td>\n",
       "      <td>1990-01-29</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1739</td>\n",
       "      <td>71.451667</td>\n",
       "      <td>Female</td>\n",
       "      <td>dementia</td>\n",
       "      <td>1990-01-29</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1977</td>\n",
       "      <td>89.341515</td>\n",
       "      <td>Female</td>\n",
       "      <td>dementia</td>\n",
       "      <td>1990-01-29</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>37203</th>\n",
       "      <td>48966</td>\n",
       "      <td>72.428198</td>\n",
       "      <td>Male</td>\n",
       "      <td>dementia</td>\n",
       "      <td>2020-02-17</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>37204</th>\n",
       "      <td>48972</td>\n",
       "      <td>87.912832</td>\n",
       "      <td>Female</td>\n",
       "      <td>dementia</td>\n",
       "      <td>2020-02-17</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>37205</th>\n",
       "      <td>49163</td>\n",
       "      <td>78.288977</td>\n",
       "      <td>Female</td>\n",
       "      <td>dementia</td>\n",
       "      <td>2020-02-17</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>37206</th>\n",
       "      <td>49939</td>\n",
       "      <td>91.829400</td>\n",
       "      <td>Male</td>\n",
       "      <td>dementia</td>\n",
       "      <td>2020-02-17</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>37207 rows × 5 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "        s_id        age     sex  dementia       date\n",
       "0        298  81.311118  Female  dementia 1990-01-29\n",
       "1       1699  78.268114  Female  dementia 1990-01-29\n",
       "2       1739  71.451667  Female  dementia 1990-01-29\n",
       "3       1977  89.341515  Female  dementia 1990-01-29\n",
       "...      ...        ...     ...       ...        ...\n",
       "37203  48966  72.428198    Male  dementia 2020-02-17\n",
       "37204  48972  87.912832  Female  dementia 2020-02-17\n",
       "37205  49163  78.288977  Female  dementia 2020-02-17\n",
       "37206  49939  91.829400    Male  dementia 2020-02-17\n",
       "\n",
       "[37207 rows x 5 columns]"
      ]
     },
     "execution_count": 63,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f2fb020d",
   "metadata": {},
   "outputs": [],
   "source": [
    "# check that only living simulants go to the hospital"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "vivarium_nih_us_cvd",
   "language": "python",
   "name": "vivarium_nih_us_cvd"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
 }