added VE script and excel file
This commit is contained in:
parent
a239b4f1ba
commit
db85c41fa0
3 changed files with 60 additions and 0 deletions
1
.gitattributes
vendored
Normal file
1
.gitattributes
vendored
Normal file
|
|
@ -0,0 +1 @@
|
|||
*.xlsx filter=lfs diff=lfs merge=lfs -text
|
||||
|
|
@ -0,0 +1,3 @@
|
|||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:2aef0605c3668b4884b815f71c0eae93eaaa7b88d6b6c17ff97f6a86a38674a5
|
||||
size 76179581
|
||||
56
caimira/scripts/data/vaccine_effectiveness.py
Normal file
56
caimira/scripts/data/vaccine_effectiveness.py
Normal file
|
|
@ -0,0 +1,56 @@
|
|||
import pandas as pd
|
||||
from tabulate import tabulate
|
||||
|
||||
'''
|
||||
Script file to generate the vaccine effectiveness values.
|
||||
To generate the primary vaccine effectiveness values, uncoment lines 16-21.
|
||||
To generate the booster effectiveness values, uncoment lines 26-56.
|
||||
'''
|
||||
|
||||
# Data from 08 Sep. 2022
|
||||
file_loc = "./WeeklySummary_COVID19_VE_Studies_08Sep2022_adapted.xlsx"
|
||||
|
||||
|
||||
# ------- PRIMARY VACCINATION ------ #
|
||||
|
||||
# df = pd.read_excel(file_loc, sheet_name="Primary_filtered", usecols="A, B, E")
|
||||
|
||||
# calculate the VE value
|
||||
# df = df.drop(df[df['VE'] < 0].index)
|
||||
# ve_data = df.groupby(['vaccine'])['VE'].mean().divide(100).reset_index()
|
||||
# print(tabulate(ve_data, headers='keys', tablefmt='psql'))
|
||||
|
||||
|
||||
# ------- BOOSTER VACCINATION ------ #
|
||||
|
||||
# df = pd.read_excel(file_loc, sheet_name="Booster_filtered", usecols="A, B, C, F")
|
||||
|
||||
# # create df without the ' or ' substring in primary vaccines
|
||||
# rows_with_or = df[df['primary series vaccine'].str.contains(' or ')]
|
||||
# rows_indexes = list(rows_with_or.index)
|
||||
# df_without_or = df.drop(labels=rows_indexes, axis=0)
|
||||
|
||||
# # copy of all the rows that contain ' or '
|
||||
# new_rows_with_or = rows_with_or.reset_index().copy()
|
||||
|
||||
# # create new dataframe empty
|
||||
# rows_to_add = pd.DataFrame(columns=rows_with_or.columns)
|
||||
|
||||
# # duplicate each row and add it into the new dataframe
|
||||
# for index, row in new_rows_with_or.iterrows():
|
||||
# new_rows_with_or.at[index, 'primary series vaccine'] = row['primary series vaccine'].split(' or ')[0]
|
||||
# rows_to_add.loc[index] = new_rows_with_or.loc[index]
|
||||
# new_rows_with_or.at[index, 'primary series vaccine'] = row['primary series vaccine'].split(' or ')[1]
|
||||
# rows_to_add.loc[len(rows_indexes)+index] = new_rows_with_or.loc[index]
|
||||
|
||||
# # merge the dataframe without the ' or ' with the new dataframe that has the rows divided in two
|
||||
# final_df = pd.concat([df_without_or, rows_to_add]).reset_index().drop(columns=['index'])
|
||||
|
||||
# # calculate the VE value
|
||||
# final_df = final_df.drop(final_df[final_df['VE'] < 0].index)
|
||||
|
||||
# ve_data = final_df.groupby(['primary series vaccine', 'booster vaccine'])['VE'].mean().divide(100).reset_index()
|
||||
|
||||
# result = ve_data.to_dict('records')
|
||||
|
||||
# print(tabulate(ve_data, headers='keys', tablefmt='psql'))
|
||||
Loading…
Reference in a new issue