Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Download

Jupyter notebook 2-Digging into participation, utilization, cost, and quality.ipynb

51 views
Kernel: Python 2 (SageMath)

Notebook #2: Digging into participation, utilization, cost, and quality

This notebook...

  • looks at which CMS programs the cardiologist participates in

  • brings in data from CMS Aggregate Utilization and Cost data for CY2014

  • starts to compare the 8 given cardiologists to the population of 5,480 CA cardiologists we have Medicare data on

import pandas as pd import numpy as np pd.set_option('display.max_columns', None) # Let's load the cardiologist file again given_cardiologists = pd.read_csv("data/given_cardiologists_plus_national_downloadable_file.csv") cms_program_cols = ['Participating in eRx', 'Participating in PQRS', 'Participating in EHR', 'Received PQRS Maintenance of Certification Program Incentive', 'Participated in Million Hearts'] given_cardiologists.replace(np.nan, "NULL", inplace=True) given_cardiologists.groupby(cms_program_cols).size()
Participating in eRx Participating in PQRS Participating in EHR Received PQRS Maintenance of Certification Program Incentive Participated in Million Hearts NULL NULL NULL NULL NULL 2 Y Y NULL NULL 6 dtype: int64

Analysis of CMS program participation

  • We can see that none of the cardiologists are participating in eRX, Million Hearts, or have received the PQRS incentive.

  • 6 of the 8 physicians are participating in both PQRS and EHR incentive program -- we may want to prioritize these six

# Let's now look at the aggregate provider utilizationd data from CMS (source #3 in README.md) all_provider_agg_util = pd.read_csv("data/data.cms.gov_4a3h-46r6_CY2015-Aggregate-Utilization.csv", low_memory=False)
# and join in the all_provider_agg_util data with given_cardiologist data into a new dataframe given_cardiologists_plus_agg_util = pd.merge(given_cardiologists, all_provider_agg_util, how='left', left_on='npi', right_on='National Provider Identifier', suffixes=('_ndf','_agg')) # write this merge to the file system for safe keeping given_cardiologists_plus_agg_util.to_csv("data/given_cardiologists_plus_agg_util.csv") # we may want to look at all cardiologists' aggregate information so let's bring in the original source #1 dataset and join it with this one as well all_providers = pd.read_csv("data/data.medicare.gov_s63f-csi6_National-Downloadable_File.csv", low_memory=False) all_cardiologists = all_providers[all_providers['Primary specialty'] == 'CARDIOVASCULAR DISEASE (CARDIOLOGY)'] all_cardiologists_in_ca = all_cardiologists[all_cardiologists['State'] == 'CA'] all_cardiologists_in_ca_plus_agg_util = pd.merge(all_cardiologists_in_ca, all_provider_agg_util, how='left', left_on='npi', right_on='National Provider Identifier', suffixes=('_ndf','_agg'))
# So what data do we have to work with now? given_cardiologists_plus_agg_util

Analysis of given cardiologists and all CA cardiologists

# How many CA cardiologists can we compare these 8 with? print "There are {0} cardiologists in CA that we will compare these {1} with".format(len(all_cardiologists_in_ca_plus_agg_util), len(given_cardiologists_plus_agg_util))
There are 5480 cardiologists in CA that we will compare these 8 with
# We will use plotly for interactive charting and visual analytics import plotly.plotly as py import plotly.graph_objs as go from plotly import tools # helper method for creating hover over tooltips/labels def list_of_provider_labels(df): labels = [] for n, provider in df.iterrows(): labels.append("NPI: {0}<br />Name: {1}, {2}".format(provider['npi'], provider['Last Name'], provider['First Name'])) return labels # given one df, this produces a two-column layout: [boxplot | scatterplot] ## Example usage: ## fig = plotly_box_and_scatter_plot(given_cardiologists_plus_agg_util, 'Graduation year') ## py.iplot(fig) def plotly_box_and_scatter_plot(df, column_to_graph): data = [ go.Box( name = column_to_graph, y = df[column_to_graph], boxmean = True ), go.Scatter( x = np.full(len(df), 0), y = df[column_to_graph].tolist(), mode = 'markers', text = list_of_provider_labels(df) #df['npi'].tolist(), ) ] layout = go.Layout( showlegend=False, hovermode='closest', title="{0}".format(column_to_graph) ) fig = go.Figure(data=data, layout=layout) return fig # given two dfs, this produces a two-column layout: [given boxplot | given scatterplot | all boxplot | all scatterplot] ## Example usage: ## fig = plotly_compare_distribution_of_column( ## column_to_graph = 'Percent (%) of Beneficiaries Identified With Ischemic Heart Disease ', ## given_name = 'Nearby Cardios', given_df = given_cardiologists_plus_agg_util, ## all_name = 'All CA Cardios', all_df = all_cardiologists_in_ca_plus_agg_util ## y_min = 0, y_min = 100 # these are optional! ## ) ## py.iplot(fig) def plotly_compare_distribution_of_column(column_to_graph, given_df, given_name, all_df, all_name, y_min=None, y_max=None): given_box = go.Box( name = given_name, y = given_df[column_to_graph], boxmean = True ) given_scatter = go.Scatter( name = given_name, x = np.full(len(given_df), 0), y = given_df[column_to_graph].tolist(), mode = 'markers', text = list_of_provider_labels(given_df) ) all_box = go.Box( name = all_name, y = all_df[column_to_graph], boxmean = True ) all_scatter = go.Scatter( name = all_name, x = np.full(len(all_df), 0), y = all_df[column_to_graph].tolist(), mode = 'markers', text = list_of_provider_labels(all_df) ) fig = tools.make_subplots(rows=1, cols=4) fig.append_trace(given_box, 1, 1) fig.append_trace(given_scatter, 1, 2) fig.append_trace(all_box, 1, 3) fig.append_trace(all_scatter, 1, 4) fig['layout'].update(title=column_to_graph, showlegend=True, hovermode='closest') fig['layout']['xaxis2'].update(title=given_name, type='category', ticks='', showticklabels=False) fig['layout']['xaxis4'].update(title=all_name, type='category', ticks='', showticklabels=False) # if y_min and y_max are non-None, set all axes to the specified min and max if y_min != None and y_max != None: y_axis_layout = dict(range = [y_min, y_max], autorange = False) fig['layout']['yaxis1'].update(y_axis_layout) fig['layout']['yaxis2'].update(y_axis_layout) fig['layout']['yaxis3'].update(y_axis_layout) fig['layout']['yaxis4'].update(y_axis_layout) return fig
fig = plotly_box_and_scatter_plot(given_cardiologists_plus_agg_util, 'Percent (%) of Beneficiaries Identified With Heart Failure ') py.iplot(fig)
fig = plotly_compare_distribution_of_column( column_to_graph = 'Percent (%) of Beneficiaries Identified With Heart Failure ', given_name = 'Nearby Cardios', given_df = given_cardiologists_plus_agg_util, all_name = 'All CA Cardios', all_df = all_cardiologists_in_ca_plus_agg_util) py.iplot(fig)
This is the format of your plot grid: [ (1,1) x1,y1 ] [ (1,2) x2,y2 ] [ (1,3) x3,y3 ] [ (1,4) x4,y4 ]
fig1 = plotly_compare_distribution_of_column( column_to_graph = 'Percent (%) of Beneficiaries Identified With Heart Failure ', given_name = 'Nearby Cardios', given_df = given_cardiologists_plus_agg_util, all_name = 'All CA Cardios', all_df = all_cardiologists_in_ca_plus_agg_util, y_min = 0, y_max = 100) fig2 = plotly_compare_distribution_of_column( column_to_graph = 'Percent (%) of Beneficiaries Identified With Ischemic Heart Disease ', given_name = 'Nearby Cardios', given_df = given_cardiologists_plus_agg_util, all_name = 'All CA Cardios', all_df = all_cardiologists_in_ca_plus_agg_util, y_min = 0, y_max = 100) display(py.iplot(fig1)) display(py.iplot(fig2))
This is the format of your plot grid: [ (1,1) x1,y1 ] [ (1,2) x2,y2 ] [ (1,3) x3,y3 ] [ (1,4) x4,y4 ] This is the format of your plot grid: [ (1,1) x1,y1 ] [ (1,2) x2,y2 ] [ (1,3) x3,y3 ] [ (1,4) x4,y4 ]
from IPython.display import display # let's chart a bunch more interesting columns columns_to_chart = ['Number of HCPCS', 'Number of Services', 'Number of Medicare Beneficiaries', 'Number of Medical Services', 'Average Age of Beneficiaries', 'Average HCC Risk Score of Beneficiaries ', 'Number of Beneficiaries With Medicare & Medicaid Entitlement'] for column in columns_to_chart: fig = plotly_compare_distribution_of_column( column_to_graph = column, given_name = 'Nearby Cardios', given_df = given_cardiologists_plus_agg_util, all_name = 'All CA Cardios', all_df = all_cardiologists_in_ca_plus_agg_util) display(py.iplot(fig))
This is the format of your plot grid: [ (1,1) x1,y1 ] [ (1,2) x2,y2 ] [ (1,3) x3,y3 ] [ (1,4) x4,y4 ]
This is the format of your plot grid: [ (1,1) x1,y1 ] [ (1,2) x2,y2 ] [ (1,3) x3,y3 ] [ (1,4) x4,y4 ]
This is the format of your plot grid: [ (1,1) x1,y1 ] [ (1,2) x2,y2 ] [ (1,3) x3,y3 ] [ (1,4) x4,y4 ]
This is the format of your plot grid: [ (1,1) x1,y1 ] [ (1,2) x2,y2 ] [ (1,3) x3,y3 ] [ (1,4) x4,y4 ]
This is the format of your plot grid: [ (1,1) x1,y1 ] [ (1,2) x2,y2 ] [ (1,3) x3,y3 ] [ (1,4) x4,y4 ]
This is the format of your plot grid: [ (1,1) x1,y1 ] [ (1,2) x2,y2 ] [ (1,3) x3,y3 ] [ (1,4) x4,y4 ]
This is the format of your plot grid: [ (1,1) x1,y1 ] [ (1,2) x2,y2 ] [ (1,3) x3,y3 ] [ (1,4) x4,y4 ]