From db85c41fa0fe0966d28b77255de1fa1f8b81d35e Mon Sep 17 00:00:00 2001 From: Luis Aleixo Date: Wed, 9 Nov 2022 11:55:47 +0100 Subject: [PATCH] added VE script and excel file --- .gitattributes | 1 + ..._COVID19_VE_Studies_08Sep2022_adapted.xlsx | 3 + caimira/scripts/data/vaccine_effectiveness.py | 56 +++++++++++++++++++ 3 files changed, 60 insertions(+) create mode 100644 .gitattributes create mode 100644 caimira/scripts/data/WeeklySummary_COVID19_VE_Studies_08Sep2022_adapted.xlsx create mode 100644 caimira/scripts/data/vaccine_effectiveness.py diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 00000000..ca21a586 --- /dev/null +++ b/.gitattributes @@ -0,0 +1 @@ +*.xlsx filter=lfs diff=lfs merge=lfs -text diff --git a/caimira/scripts/data/WeeklySummary_COVID19_VE_Studies_08Sep2022_adapted.xlsx b/caimira/scripts/data/WeeklySummary_COVID19_VE_Studies_08Sep2022_adapted.xlsx new file mode 100644 index 00000000..c3ff459d --- /dev/null +++ b/caimira/scripts/data/WeeklySummary_COVID19_VE_Studies_08Sep2022_adapted.xlsx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2aef0605c3668b4884b815f71c0eae93eaaa7b88d6b6c17ff97f6a86a38674a5 +size 76179581 diff --git a/caimira/scripts/data/vaccine_effectiveness.py b/caimira/scripts/data/vaccine_effectiveness.py new file mode 100644 index 00000000..8186da64 --- /dev/null +++ b/caimira/scripts/data/vaccine_effectiveness.py @@ -0,0 +1,56 @@ +import pandas as pd +from tabulate import tabulate + +''' +Script file to generate the vaccine effectiveness values. +To generate the primary vaccine effectiveness values, uncoment lines 16-21. +To generate the booster effectiveness values, uncoment lines 26-56. +''' + +# Data from 08 Sep. 2022 +file_loc = "./WeeklySummary_COVID19_VE_Studies_08Sep2022_adapted.xlsx" + + +# ------- PRIMARY VACCINATION ------ # + +# df = pd.read_excel(file_loc, sheet_name="Primary_filtered", usecols="A, B, E") + +# calculate the VE value +# df = df.drop(df[df['VE'] < 0].index) +# ve_data = df.groupby(['vaccine'])['VE'].mean().divide(100).reset_index() +# print(tabulate(ve_data, headers='keys', tablefmt='psql')) + + +# ------- BOOSTER VACCINATION ------ # + +# df = pd.read_excel(file_loc, sheet_name="Booster_filtered", usecols="A, B, C, F") + +# # create df without the ' or ' substring in primary vaccines +# rows_with_or = df[df['primary series vaccine'].str.contains(' or ')] +# rows_indexes = list(rows_with_or.index) +# df_without_or = df.drop(labels=rows_indexes, axis=0) + +# # copy of all the rows that contain ' or ' +# new_rows_with_or = rows_with_or.reset_index().copy() + +# # create new dataframe empty +# rows_to_add = pd.DataFrame(columns=rows_with_or.columns) + +# # duplicate each row and add it into the new dataframe +# for index, row in new_rows_with_or.iterrows(): +# new_rows_with_or.at[index, 'primary series vaccine'] = row['primary series vaccine'].split(' or ')[0] +# rows_to_add.loc[index] = new_rows_with_or.loc[index] +# new_rows_with_or.at[index, 'primary series vaccine'] = row['primary series vaccine'].split(' or ')[1] +# rows_to_add.loc[len(rows_indexes)+index] = new_rows_with_or.loc[index] + +# # merge the dataframe without the ' or ' with the new dataframe that has the rows divided in two +# final_df = pd.concat([df_without_or, rows_to_add]).reset_index().drop(columns=['index']) + +# # calculate the VE value +# final_df = final_df.drop(final_df[final_df['VE'] < 0].index) + +# ve_data = final_df.groupby(['primary series vaccine', 'booster vaccine'])['VE'].mean().divide(100).reset_index() + +# result = ve_data.to_dict('records') + +# print(tabulate(ve_data, headers='keys', tablefmt='psql'))