Deployment id: AKfycbwcTkewNGFh14BRe0JGe7zlQNexU1fnZnageLzR3BRCFb6ma8U5_IR8LyrCLCApB8Hs_g
In [ ]:
# Frequently used constants
cluster_total = 14
survey_total = 76
In [ ]:
from google.colab import auth
from google.oauth2 import service_account
from googleapiclient.discovery import build
# Authenticate and authorize using a service account
auth.authenticate_user()
In [ ]:
import gspread
from oauth2client.service_account import ServiceAccountCredentials
import pandas as pd
scope = ['https://www.googleapis.com/auth/spreadsheets', 'https://www.googleapis.com/auth/drive.file', 'https://www.googleapis.com/auth/drive']
creds = ServiceAccountCredentials.from_json_keyfile_name('credentials.json', scope)
client = gspread.authorize(creds)
In [ ]:
spreadsheet_url = 'https://docs.google.com/spreadsheets/d/1CatLlB4axdBW0uJRoioR-x00RsPzQ54CFYq1PqzgV7E/edit?usp=drive_web&ouid=107190373735789167396'
sheet = client.open_by_url(spreadsheet_url)
worksheet = sheet.get_worksheet(0)
data = worksheet.get_all_records()
df = pd.DataFrame(data)
df.head()
Out[ ]:
| NAME | id | GENDER | AGE | RACETHN | EDUCCAT5 | DIVISION | MARITAL_ACS | HHSIZECAT | ... | CI_LABEL_OWNGUN_GSS | CI_LABEL_SEXUALITY | CI_LABEL_HIV_STAT | CI_LABEL_PREG_STAT | CI_LABEL_CC_NUM | CI_LABEL_cc_encoded | CI_LABEL_cc_disclosed | CI_LABEL_NumChronicIllness | AGE_INT | Cluster | ||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | Luke Walsh | 1 | Male | 25 | White non-Hispanic | Some college | Mountain | Never married | 3+ | ... | is | is | is probably | is probably | is | is probably | is possibly | is probably | 20-29 | 5 |
| 1 | 1 | Matilde Izaguirre Checa | 2 | Female | 70 | Hispanic | HS Grad | West South Central | Divorced | 1 | ... | is probably | is possibly | is | is | is | is | is | is possibly | 70-79 | 11 |
| 2 | 2 | Ryan Smith | 3 | Male | 85 | White non-Hispanic | Less than HS | Middle Atlantic | Now married | 2 | ... | is possibly | is probably | is probably | is | is probably | is probably | is probably | is probably | 80-89 | 2 |
| 3 | 3 | Matthew Grimes | 4 | Male | 59 | White non-Hispanic | HS Grad | Mountain | Now married | 2 | ... | is probably | is probably | is probably | is | is possibly | is probably | is | is probably | 50-59 | 12 |
| 4 | 4 | Miraan Rama | 5 | Female | 19 | Asian | Some college | Pacific | Never married | 1 | ... | is probably | is | is probably | is | is probably | is probably | is probably | is | 10-19 | 9 |
5 rows × 187 columns
In [ ]:
# Mute it after creating the representative data set
'''
# Select 152 profiles from each cluster evenly
import pandas as pd
import itertools
from googleapiclient.discovery import build
selected_data = []
for cluster_id in range(cluster_total):
cluster_data = df[df['Cluster'] == cluster_id]
selected_cluster_data = []
if len(selected_cluster_data) < 152:
selected_cluster_data += cluster_data.sample(n=152 - len(selected_cluster_data), random_state=42).to_dict('records')
selected_data.extend(selected_cluster_data)
selected_df = pd.DataFrame(selected_data)
'''
In [ ]:
for cluster_id in range(cluster_total):
cluster_data = df[df['Cluster'] == cluster_id]
# Separate out subsets of interest
hiv_positive_data = cluster_data[cluster_data['HIV_STAT'] == 'positive']
pregnant_data = cluster_data[cluster_data['PREG_STAT'] == 'Pregnant']
# Start building our selection for this cluster in a list
selection_list = []
# 1) If HIV+ data exists, pick at least 1 row for sure
hiv_count = min(4, len(hiv_positive_data)) # up to 4
if hiv_count > 0:
selected_hiv = hiv_positive_data.sample(n=hiv_count, random_state=42)
selection_list.append(selected_hiv)
# 2) If pregnant data exists, pick at least 1 row for sure
pregnant_count = min(4, len(pregnant_data))
if pregnant_count > 0:
selected_pregnant = pregnant_data.sample(n=pregnant_count, random_state=43)
# Only add if not the same row (in case one row is both HIV+ & Pregnant):
if not selected_pregnant.index.isin(selected_hiv.index).all():
selection_list.append(selected_pregnant)
# Only add if it's not the exact same row as the HIV+ one selected
# (In case the same participant is both HIV+ and Pregnant)
if not selected_pregnant.index.isin(selected_hiv.index).all():
selection_list.append(selected_pregnant)
# Convert the forced picks into one DataFrame
forced_picks = pd.concat(selection_list).drop_duplicates()
remaining_needed = 152 - forced_picks.shape[0]
if remaining_needed < 0:
remaining_needed = 0
# Exclude forced_picks from the random pool
leftover_pool = cluster_data.drop(forced_picks.index)
if leftover_pool.shape[0] < remaining_needed:
print(f"Not enough rows in cluster {cluster_id} to meet the 152-row requirement.")
selected_cluster_data = pd.concat([forced_picks, leftover_pool])
else:
selected_cluster_data = pd.concat([
forced_picks,
leftover_pool.sample(n=remaining_needed, random_state=44)
])
# Finally extend your overall selection
selected_data.extend(selected_cluster_data.to_dict('records'))
In [ ]:
#see all HIV positive people
selected_df[selected_df['PREG_STAT'] == 'Negative']
Out[ ]:
| NAME | id | GENDER | AGE | RACETHN | EDUCCAT5 | DIVISION | MARITAL_ACS | HHSIZECAT | ... | CI_LABEL_OWNGUN_GSS | CI_LABEL_SEXUALITY | CI_LABEL_HIV_STAT | CI_LABEL_PREG_STAT | CI_LABEL_CC_NUM | CI_LABEL_cc_encoded | CI_LABEL_cc_disclosed | CI_LABEL_NumChronicIllness | AGE_INT | Cluster | ||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 3413 | Karen Campbell | 3414 | Female | 85 | White non-Hispanic | HS Grad | East South Central | Widowed | 1 | ... | is probably | is probably | is probably | is probably | is probably | is probably | is probably | is possibly | 80-89 | 0 |
| 1 | 10657 | Lori Delacruz | 10658 | Female | 80 | White non-Hispanic | HS Grad | South Atlantic | Widowed | 1 | ... | is possibly | is | is probably | is | is probably | is probably | is probably | is probably | 80-89 | 0 |
| 2 | 1669 | Shannon Mcdonald | 1670 | Female | 80 | White non-Hispanic | Some college | South Atlantic | Widowed | 2 | ... | is probably | is | is possibly | is probably | is probably | is | is probably | is | 80-89 | 0 |
| 4 | 2285 | Jennifer Phillips | 2286 | Female | 80 | White non-Hispanic | HS Grad | South Atlantic | Divorced | 2 | ... | is probably | is possibly | is probably | is probably | is possibly | is probably | is possibly | is probably | 80-89 | 0 |
| 6 | 7536 | Alondra Benavente Sales | 7537 | Female | 42 | Hispanic | Some college | South Atlantic | Now married | 3+ | ... | is probably | is probably | is | is probably | is possibly | is | is | is possibly | 40-49 | 0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 2122 | 5102 | Kathleen Chaney | 5103 | Female | 20 | White non-Hispanic | Some college | New England | Never married | 3+ | ... | is | is probably | is probably | is | is | is | is probably | is probably | 20-29 | 13 |
| 2123 | 12570 | Heather Anderson | 12571 | Female | 26 | White non-Hispanic | College grad | Middle Atlantic | Never married | 1 | ... | is probably | is probably | is | is probably | is possibly | is | is probably | is probably | 20-29 | 13 |
| 2125 | 4852 | Lisa Lang | 4853 | Female | 24 | White non-Hispanic | HS Grad | West North Central | Never married | 3+ | ... | is probably | is unlikely but might be | is probably | is possibly | is probably | is | is probably | is | 20-29 | 13 |
| 2126 | 13247 | Kayla Mcdonald | 13248 | Female | 21 | White non-Hispanic | Some college | South Atlantic | Never married | 3+ | ... | is | is probably | is unlikely but might be | is possibly | is | is probably | is probably | is possibly | 20-29 | 13 |
| 2127 | 14580 | Cindy Hodges | 14581 | Female | 24 | White non-Hispanic | Some college | Pacific | Never married | 3+ | ... | is possibly | is probably | is | is unlikely but might be | is possibly | is | is | is probably | 20-29 | 13 |
1004 rows × 187 columns
In [ ]:
# Mute it after creating the representative sheet
'''
new_sheet_title = 'representative_set'
new_spreadsheet = client.create(new_sheet_title)
new_worksheet = new_spreadsheet.get_worksheet(0)
new_worksheet.update_title('selected_data')
new_worksheet.update([selected_df.columns.values.tolist()] + selected_df.values.tolist())
spreadsheet_id = new_spreadsheet.id
drive_service = build('drive', 'v3', credentials=creds)
# Move the spreadsheet to the folder
try:
drive_service.files().update(
fileId=spreadsheet_id,
addParents='1uJa_ZyNHqZn-YBju4kRre0HBfbcUWCSQ',
removeParents='root',
fields='id, parents'
).execute()
print(f"New sheet '{new_sheet_title}' created and moved to the folder.")
except Exception as e:
print(f"An error occurred while moving the file: {e}")
'''
New sheet 'representative_set' created and moved to the folder.
In [ ]:
# Added: representative sheet access
rpt_sheet_url = 'https://docs.google.com/spreadsheets/d/1OGcWVmFb5ulAp9UEi8BV5zShPU3SP-gSEjhSllZQqb0/edit?gid=0#gid=0'
rpt_sheet = client.open_by_url(rpt_sheet_url)
worksheet = rpt_sheet.get_worksheet(0)
data = worksheet.get_all_records()
df = pd.DataFrame(data)
df.head()
Out[ ]:
| NAME | id | GENDER | AGE | RACETHN | EDUCCAT5 | DIVISION | MARITAL_ACS | HHSIZECAT | ... | CI_LABEL_OWNGUN_GSS | CI_LABEL_SEXUALITY | CI_LABEL_HIV_STAT | CI_LABEL_PREG_STAT | CI_LABEL_CC_NUM | CI_LABEL_cc_encoded | CI_LABEL_cc_disclosed | CI_LABEL_NumChronicIllness | AGE_INT | Cluster | ||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 3413 | Karen Campbell | 3414 | Female | 85 | White non-Hispanic | HS Grad | East South Central | Widowed | 1 | ... | is probably | is probably | is probably | is probably | is probably | is probably | is probably | is possibly | 80-89 | 0 |
| 1 | 10657 | Lori Delacruz | 10658 | Female | 80 | White non-Hispanic | HS Grad | South Atlantic | Widowed | 1 | ... | is possibly | is | is probably | is | is probably | is probably | is probably | is probably | 80-89 | 0 |
| 2 | 1669 | Shannon Mcdonald | 1670 | Female | 80 | White non-Hispanic | Some college | South Atlantic | Widowed | 2 | ... | is probably | is | is possibly | is probably | is probably | is | is probably | is | 80-89 | 0 |
| 3 | 15793 | Albina Cobo Manzano | 15794 | Female | 36 | Hispanic | College grad | Pacific | Now married | 2 | ... | is | is probably | is probably | is probably | is | is probably | is probably | is | 30-39 | 0 |
| 4 | 2285 | Jennifer Phillips | 2286 | Female | 80 | White non-Hispanic | HS Grad | South Atlantic | Divorced | 2 | ... | is probably | is possibly | is probably | is probably | is possibly | is probably | is possibly | is probably | 80-89 | 0 |
5 rows × 187 columns
In [ ]:
from google.colab import drive
drive.mount('/content/drive')
# Initialize a set to store unique IDs
unique_ids = set()
Mounted at /content/drive
In [ ]:
import pandas as pd
import gspread
from oauth2client.service_account import ServiceAccountCredentials
import random
selected_ids = []
In [ ]:
# Mute it after creating worksheets
'''
# Added: Create 76 surveys, with each containing profiles of different clusters
import time
main_worksheet = rpt_sheet.worksheet("selected_data")
all_data = main_worksheet.get_all_records()
df = pd.DataFrame(all_data)
selected_indices = set()
for survey_num in range(survey_total):
survey_sheet_name = f"Quiz_{survey_num + 1}"
survey_data = []
try:
try:
survey_worksheet = rpt_sheet.worksheet(survey_sheet_name)
survey_worksheet.clear()
except gspread.exceptions.WorksheetNotFound:
rpt_sheet.add_worksheet(title=survey_sheet_name, rows="15", cols="20")
survey_worksheet = rpt_sheet.worksheet(survey_sheet_name)
survey_data = []
for cluster_id in range(cluster_total):
cluster_data = df[df['Cluster'] == cluster_id]
available_rows = cluster_data[~cluster_data.index.isin(selected_indices)]
if len(available_rows) >= 2:
selected_rows = available_rows.sample(n=2)
survey_data.append(selected_rows)
selected_indices.update(selected_rows.index)
elif not available_rows.empty:
selected_row = available_rows.sample(n=1)
survey_data.append(selected_row)
selected_indices.add(selected_row.index[0])
header_row = df.columns.tolist()
all_sheet_data = [header_row] + [list(row) for row in pd.concat(survey_data).itertuples(index=False, name=None)]
survey_worksheet.update('A1', all_sheet_data)
except gspread.exceptions.APIError as e:
print(f"API Error for sheet {survey_sheet_name}: {e}")
# Add program delay to avoid exceeding API limit
time.sleep(5)
continue
except Exception as e:
print(f"Unexpected error for sheet {survey_sheet_name}: {e}")
break
# Program delay
time.sleep(2)
print("Worksheet created.")
'''
<ipython-input-23-6fb946d17020>:41: DeprecationWarning: The order of arguments in worksheet.update() has changed. Please pass values first and range_name secondor used named arguments (range_name=, values=)
survey_worksheet.update('A1', all_sheet_data)
Worksheet created.
In [ ]:
import time
import gspread
import pandas as pd
initial_information = ["NAME", "id"]
base_features = [
"GENDER", "RACETHN", "EDUCCAT5", "DIVISION", "MARITAL_ACS",
"CHILDRENCAT", "CITIZEN_REC", "BORN_ACS", "AGE_INT"
]
health_features = ["HIV_STAT", "PREG_STAT", "NumChronicIllness"]
finance_features = ["FAMINC5", "CC_NUM", "FDSTMP_CPS"]
sensitive_features = ["SEXUALITY", "OWNGUN_GSS", "RELIGCAT"]
def delete_unnecessary_columns(sheet_name, visible_columns, condition_label):
worksheet = rpt_sheet.worksheet(sheet_name)
data = worksheet.get_all_records()
df = pd.DataFrame(data)
df['Condition'] = condition_label
if 'Condition' not in visible_columns:
visible_columns.append('Condition')
columns_to_keep = [col for col in visible_columns if col in df.columns]
if columns_to_keep:
print(f"Sheet '{sheet_name}' - Unnecessary columns discovered and will be deleted.")
df = df[columns_to_keep]
worksheet.clear()
worksheet.update('A1', [df.columns.tolist()] + df.values.tolist())
else:
print(f"Sheet '{sheet_name}' - Unnecessary columns may have already been deleted.")
for survey_num in range(survey_total):
survey_sheet_name = f"Quiz_{survey_num + 1}"
if survey_num <= 18:
condition_label = "Control"
visible_columns = initial_information + base_features
elif survey_num <= 37:
condition_label = "Health"
visible_columns = initial_information + base_features + health_features
elif survey_num <= 56:
condition_label = "Finance"
visible_columns = initial_information + base_features + finance_features
else:
condition_label = "Sensitive"
visible_columns = initial_information + base_features + sensitive_features
try:
delete_unnecessary_columns(survey_sheet_name, visible_columns, condition_label)
time.sleep(2)
except gspread.exceptions.APIError as e:
if 'RATE_LIMIT_EXCEEDED' in str(e):
print("Rate limit exceeded. Waiting for 5 seconds before retrying...")
time.sleep(5)
delete_unnecessary_columns(survey_sheet_name, visible_columns, condition_label)
else:
print(f"Unexpected error while processing {survey_sheet_name}: {e}")
break
print("Necessary columns have been selected.")
Sheet 'Quiz_1' - Unnecessary columns discovered and will be deleted.
<ipython-input-25-b00a5a2258ad>:30: DeprecationWarning: The order of arguments in worksheet.update() has changed. Please pass values first and range_name secondor used named arguments (range_name=, values=)
worksheet.update('A1', [df.columns.tolist()] + df.values.tolist())
Sheet 'Quiz_2' - Unnecessary columns discovered and will be deleted.
<ipython-input-25-b00a5a2258ad>:30: DeprecationWarning: The order of arguments in worksheet.update() has changed. Please pass values first and range_name secondor used named arguments (range_name=, values=)
worksheet.update('A1', [df.columns.tolist()] + df.values.tolist())
Sheet 'Quiz_3' - Unnecessary columns discovered and will be deleted.
<ipython-input-25-b00a5a2258ad>:30: DeprecationWarning: The order of arguments in worksheet.update() has changed. Please pass values first and range_name secondor used named arguments (range_name=, values=)
worksheet.update('A1', [df.columns.tolist()] + df.values.tolist())
Sheet 'Quiz_4' - Unnecessary columns discovered and will be deleted.
<ipython-input-25-b00a5a2258ad>:30: DeprecationWarning: The order of arguments in worksheet.update() has changed. Please pass values first and range_name secondor used named arguments (range_name=, values=)
worksheet.update('A1', [df.columns.tolist()] + df.values.tolist())
Sheet 'Quiz_5' - Unnecessary columns discovered and will be deleted.
<ipython-input-25-b00a5a2258ad>:30: DeprecationWarning: The order of arguments in worksheet.update() has changed. Please pass values first and range_name secondor used named arguments (range_name=, values=)
worksheet.update('A1', [df.columns.tolist()] + df.values.tolist())
Sheet 'Quiz_6' - Unnecessary columns discovered and will be deleted.
<ipython-input-25-b00a5a2258ad>:30: DeprecationWarning: The order of arguments in worksheet.update() has changed. Please pass values first and range_name secondor used named arguments (range_name=, values=)
worksheet.update('A1', [df.columns.tolist()] + df.values.tolist())
Sheet 'Quiz_7' - Unnecessary columns discovered and will be deleted.
<ipython-input-25-b00a5a2258ad>:30: DeprecationWarning: The order of arguments in worksheet.update() has changed. Please pass values first and range_name secondor used named arguments (range_name=, values=)
worksheet.update('A1', [df.columns.tolist()] + df.values.tolist())
Sheet 'Quiz_8' - Unnecessary columns discovered and will be deleted.
<ipython-input-25-b00a5a2258ad>:30: DeprecationWarning: The order of arguments in worksheet.update() has changed. Please pass values first and range_name secondor used named arguments (range_name=, values=)
worksheet.update('A1', [df.columns.tolist()] + df.values.tolist())
Sheet 'Quiz_9' - Unnecessary columns discovered and will be deleted.
<ipython-input-25-b00a5a2258ad>:30: DeprecationWarning: The order of arguments in worksheet.update() has changed. Please pass values first and range_name secondor used named arguments (range_name=, values=)
worksheet.update('A1', [df.columns.tolist()] + df.values.tolist())
Sheet 'Quiz_10' - Unnecessary columns discovered and will be deleted.
<ipython-input-25-b00a5a2258ad>:30: DeprecationWarning: The order of arguments in worksheet.update() has changed. Please pass values first and range_name secondor used named arguments (range_name=, values=)
worksheet.update('A1', [df.columns.tolist()] + df.values.tolist())
Sheet 'Quiz_11' - Unnecessary columns discovered and will be deleted.
<ipython-input-25-b00a5a2258ad>:30: DeprecationWarning: The order of arguments in worksheet.update() has changed. Please pass values first and range_name secondor used named arguments (range_name=, values=)
worksheet.update('A1', [df.columns.tolist()] + df.values.tolist())
Sheet 'Quiz_12' - Unnecessary columns discovered and will be deleted.
<ipython-input-25-b00a5a2258ad>:30: DeprecationWarning: The order of arguments in worksheet.update() has changed. Please pass values first and range_name secondor used named arguments (range_name=, values=)
worksheet.update('A1', [df.columns.tolist()] + df.values.tolist())
Sheet 'Quiz_13' - Unnecessary columns discovered and will be deleted.
<ipython-input-25-b00a5a2258ad>:30: DeprecationWarning: The order of arguments in worksheet.update() has changed. Please pass values first and range_name secondor used named arguments (range_name=, values=)
worksheet.update('A1', [df.columns.tolist()] + df.values.tolist())
Sheet 'Quiz_14' - Unnecessary columns discovered and will be deleted.
<ipython-input-25-b00a5a2258ad>:30: DeprecationWarning: The order of arguments in worksheet.update() has changed. Please pass values first and range_name secondor used named arguments (range_name=, values=)
worksheet.update('A1', [df.columns.tolist()] + df.values.tolist())
Sheet 'Quiz_15' - Unnecessary columns discovered and will be deleted.
<ipython-input-25-b00a5a2258ad>:30: DeprecationWarning: The order of arguments in worksheet.update() has changed. Please pass values first and range_name secondor used named arguments (range_name=, values=)
worksheet.update('A1', [df.columns.tolist()] + df.values.tolist())
Sheet 'Quiz_16' - Unnecessary columns discovered and will be deleted.
<ipython-input-25-b00a5a2258ad>:30: DeprecationWarning: The order of arguments in worksheet.update() has changed. Please pass values first and range_name secondor used named arguments (range_name=, values=)
worksheet.update('A1', [df.columns.tolist()] + df.values.tolist())
Sheet 'Quiz_17' - Unnecessary columns discovered and will be deleted.
<ipython-input-25-b00a5a2258ad>:30: DeprecationWarning: The order of arguments in worksheet.update() has changed. Please pass values first and range_name secondor used named arguments (range_name=, values=)
worksheet.update('A1', [df.columns.tolist()] + df.values.tolist())
Sheet 'Quiz_18' - Unnecessary columns discovered and will be deleted.
<ipython-input-25-b00a5a2258ad>:30: DeprecationWarning: The order of arguments in worksheet.update() has changed. Please pass values first and range_name secondor used named arguments (range_name=, values=)
worksheet.update('A1', [df.columns.tolist()] + df.values.tolist())
Sheet 'Quiz_19' - Unnecessary columns discovered and will be deleted.
<ipython-input-25-b00a5a2258ad>:30: DeprecationWarning: The order of arguments in worksheet.update() has changed. Please pass values first and range_name secondor used named arguments (range_name=, values=)
worksheet.update('A1', [df.columns.tolist()] + df.values.tolist())
Sheet 'Quiz_20' - Unnecessary columns discovered and will be deleted.
<ipython-input-25-b00a5a2258ad>:30: DeprecationWarning: The order of arguments in worksheet.update() has changed. Please pass values first and range_name secondor used named arguments (range_name=, values=)
worksheet.update('A1', [df.columns.tolist()] + df.values.tolist())
Sheet 'Quiz_21' - Unnecessary columns discovered and will be deleted.
<ipython-input-25-b00a5a2258ad>:30: DeprecationWarning: The order of arguments in worksheet.update() has changed. Please pass values first and range_name secondor used named arguments (range_name=, values=)
worksheet.update('A1', [df.columns.tolist()] + df.values.tolist())
Sheet 'Quiz_22' - Unnecessary columns discovered and will be deleted.
<ipython-input-25-b00a5a2258ad>:30: DeprecationWarning: The order of arguments in worksheet.update() has changed. Please pass values first and range_name secondor used named arguments (range_name=, values=)
worksheet.update('A1', [df.columns.tolist()] + df.values.tolist())
Sheet 'Quiz_23' - Unnecessary columns discovered and will be deleted.
<ipython-input-25-b00a5a2258ad>:30: DeprecationWarning: The order of arguments in worksheet.update() has changed. Please pass values first and range_name secondor used named arguments (range_name=, values=)
worksheet.update('A1', [df.columns.tolist()] + df.values.tolist())
Sheet 'Quiz_24' - Unnecessary columns discovered and will be deleted.
<ipython-input-25-b00a5a2258ad>:30: DeprecationWarning: The order of arguments in worksheet.update() has changed. Please pass values first and range_name secondor used named arguments (range_name=, values=)
worksheet.update('A1', [df.columns.tolist()] + df.values.tolist())
Sheet 'Quiz_25' - Unnecessary columns discovered and will be deleted.
<ipython-input-25-b00a5a2258ad>:30: DeprecationWarning: The order of arguments in worksheet.update() has changed. Please pass values first and range_name secondor used named arguments (range_name=, values=)
worksheet.update('A1', [df.columns.tolist()] + df.values.tolist())
Sheet 'Quiz_26' - Unnecessary columns discovered and will be deleted.
<ipython-input-25-b00a5a2258ad>:30: DeprecationWarning: The order of arguments in worksheet.update() has changed. Please pass values first and range_name secondor used named arguments (range_name=, values=)
worksheet.update('A1', [df.columns.tolist()] + df.values.tolist())
Sheet 'Quiz_27' - Unnecessary columns discovered and will be deleted.
<ipython-input-25-b00a5a2258ad>:30: DeprecationWarning: The order of arguments in worksheet.update() has changed. Please pass values first and range_name secondor used named arguments (range_name=, values=)
worksheet.update('A1', [df.columns.tolist()] + df.values.tolist())
Sheet 'Quiz_28' - Unnecessary columns discovered and will be deleted.
<ipython-input-25-b00a5a2258ad>:30: DeprecationWarning: The order of arguments in worksheet.update() has changed. Please pass values first and range_name secondor used named arguments (range_name=, values=)
worksheet.update('A1', [df.columns.tolist()] + df.values.tolist())
Sheet 'Quiz_29' - Unnecessary columns discovered and will be deleted.
<ipython-input-25-b00a5a2258ad>:30: DeprecationWarning: The order of arguments in worksheet.update() has changed. Please pass values first and range_name secondor used named arguments (range_name=, values=)
worksheet.update('A1', [df.columns.tolist()] + df.values.tolist())
Sheet 'Quiz_30' - Unnecessary columns discovered and will be deleted.
<ipython-input-25-b00a5a2258ad>:30: DeprecationWarning: The order of arguments in worksheet.update() has changed. Please pass values first and range_name secondor used named arguments (range_name=, values=)
worksheet.update('A1', [df.columns.tolist()] + df.values.tolist())
Sheet 'Quiz_31' - Unnecessary columns discovered and will be deleted.
<ipython-input-25-b00a5a2258ad>:30: DeprecationWarning: The order of arguments in worksheet.update() has changed. Please pass values first and range_name secondor used named arguments (range_name=, values=)
worksheet.update('A1', [df.columns.tolist()] + df.values.tolist())
Sheet 'Quiz_32' - Unnecessary columns discovered and will be deleted.
<ipython-input-25-b00a5a2258ad>:30: DeprecationWarning: The order of arguments in worksheet.update() has changed. Please pass values first and range_name secondor used named arguments (range_name=, values=)
worksheet.update('A1', [df.columns.tolist()] + df.values.tolist())
Sheet 'Quiz_33' - Unnecessary columns discovered and will be deleted.
<ipython-input-25-b00a5a2258ad>:30: DeprecationWarning: The order of arguments in worksheet.update() has changed. Please pass values first and range_name secondor used named arguments (range_name=, values=)
worksheet.update('A1', [df.columns.tolist()] + df.values.tolist())
Sheet 'Quiz_34' - Unnecessary columns discovered and will be deleted.
<ipython-input-25-b00a5a2258ad>:30: DeprecationWarning: The order of arguments in worksheet.update() has changed. Please pass values first and range_name secondor used named arguments (range_name=, values=)
worksheet.update('A1', [df.columns.tolist()] + df.values.tolist())
Sheet 'Quiz_35' - Unnecessary columns discovered and will be deleted.
<ipython-input-25-b00a5a2258ad>:30: DeprecationWarning: The order of arguments in worksheet.update() has changed. Please pass values first and range_name secondor used named arguments (range_name=, values=)
worksheet.update('A1', [df.columns.tolist()] + df.values.tolist())
Sheet 'Quiz_36' - Unnecessary columns discovered and will be deleted.
<ipython-input-25-b00a5a2258ad>:30: DeprecationWarning: The order of arguments in worksheet.update() has changed. Please pass values first and range_name secondor used named arguments (range_name=, values=)
worksheet.update('A1', [df.columns.tolist()] + df.values.tolist())
Sheet 'Quiz_37' - Unnecessary columns discovered and will be deleted.
<ipython-input-25-b00a5a2258ad>:30: DeprecationWarning: The order of arguments in worksheet.update() has changed. Please pass values first and range_name secondor used named arguments (range_name=, values=)
worksheet.update('A1', [df.columns.tolist()] + df.values.tolist())
Sheet 'Quiz_38' - Unnecessary columns discovered and will be deleted.
<ipython-input-25-b00a5a2258ad>:30: DeprecationWarning: The order of arguments in worksheet.update() has changed. Please pass values first and range_name secondor used named arguments (range_name=, values=)
worksheet.update('A1', [df.columns.tolist()] + df.values.tolist())
Sheet 'Quiz_39' - Unnecessary columns discovered and will be deleted.
<ipython-input-25-b00a5a2258ad>:30: DeprecationWarning: The order of arguments in worksheet.update() has changed. Please pass values first and range_name secondor used named arguments (range_name=, values=)
worksheet.update('A1', [df.columns.tolist()] + df.values.tolist())
Sheet 'Quiz_40' - Unnecessary columns discovered and will be deleted.
<ipython-input-25-b00a5a2258ad>:30: DeprecationWarning: The order of arguments in worksheet.update() has changed. Please pass values first and range_name secondor used named arguments (range_name=, values=)
worksheet.update('A1', [df.columns.tolist()] + df.values.tolist())
Sheet 'Quiz_41' - Unnecessary columns discovered and will be deleted.
<ipython-input-25-b00a5a2258ad>:30: DeprecationWarning: The order of arguments in worksheet.update() has changed. Please pass values first and range_name secondor used named arguments (range_name=, values=)
worksheet.update('A1', [df.columns.tolist()] + df.values.tolist())
Sheet 'Quiz_42' - Unnecessary columns discovered and will be deleted.
<ipython-input-25-b00a5a2258ad>:30: DeprecationWarning: The order of arguments in worksheet.update() has changed. Please pass values first and range_name secondor used named arguments (range_name=, values=)
worksheet.update('A1', [df.columns.tolist()] + df.values.tolist())
Sheet 'Quiz_43' - Unnecessary columns discovered and will be deleted.
<ipython-input-25-b00a5a2258ad>:30: DeprecationWarning: The order of arguments in worksheet.update() has changed. Please pass values first and range_name secondor used named arguments (range_name=, values=)
worksheet.update('A1', [df.columns.tolist()] + df.values.tolist())
Sheet 'Quiz_44' - Unnecessary columns discovered and will be deleted.
<ipython-input-25-b00a5a2258ad>:30: DeprecationWarning: The order of arguments in worksheet.update() has changed. Please pass values first and range_name secondor used named arguments (range_name=, values=)
worksheet.update('A1', [df.columns.tolist()] + df.values.tolist())
Sheet 'Quiz_45' - Unnecessary columns discovered and will be deleted.
<ipython-input-25-b00a5a2258ad>:30: DeprecationWarning: The order of arguments in worksheet.update() has changed. Please pass values first and range_name secondor used named arguments (range_name=, values=)
worksheet.update('A1', [df.columns.tolist()] + df.values.tolist())
Sheet 'Quiz_46' - Unnecessary columns discovered and will be deleted.
<ipython-input-25-b00a5a2258ad>:30: DeprecationWarning: The order of arguments in worksheet.update() has changed. Please pass values first and range_name secondor used named arguments (range_name=, values=)
worksheet.update('A1', [df.columns.tolist()] + df.values.tolist())
Sheet 'Quiz_47' - Unnecessary columns discovered and will be deleted.
<ipython-input-25-b00a5a2258ad>:30: DeprecationWarning: The order of arguments in worksheet.update() has changed. Please pass values first and range_name secondor used named arguments (range_name=, values=)
worksheet.update('A1', [df.columns.tolist()] + df.values.tolist())
Sheet 'Quiz_48' - Unnecessary columns discovered and will be deleted.
<ipython-input-25-b00a5a2258ad>:30: DeprecationWarning: The order of arguments in worksheet.update() has changed. Please pass values first and range_name secondor used named arguments (range_name=, values=)
worksheet.update('A1', [df.columns.tolist()] + df.values.tolist())
Sheet 'Quiz_49' - Unnecessary columns discovered and will be deleted.
<ipython-input-25-b00a5a2258ad>:30: DeprecationWarning: The order of arguments in worksheet.update() has changed. Please pass values first and range_name secondor used named arguments (range_name=, values=)
worksheet.update('A1', [df.columns.tolist()] + df.values.tolist())
Sheet 'Quiz_50' - Unnecessary columns discovered and will be deleted.
<ipython-input-25-b00a5a2258ad>:30: DeprecationWarning: The order of arguments in worksheet.update() has changed. Please pass values first and range_name secondor used named arguments (range_name=, values=)
worksheet.update('A1', [df.columns.tolist()] + df.values.tolist())
Sheet 'Quiz_51' - Unnecessary columns discovered and will be deleted.
<ipython-input-25-b00a5a2258ad>:30: DeprecationWarning: The order of arguments in worksheet.update() has changed. Please pass values first and range_name secondor used named arguments (range_name=, values=)
worksheet.update('A1', [df.columns.tolist()] + df.values.tolist())
Sheet 'Quiz_52' - Unnecessary columns discovered and will be deleted.
<ipython-input-25-b00a5a2258ad>:30: DeprecationWarning: The order of arguments in worksheet.update() has changed. Please pass values first and range_name secondor used named arguments (range_name=, values=)
worksheet.update('A1', [df.columns.tolist()] + df.values.tolist())
Sheet 'Quiz_53' - Unnecessary columns discovered and will be deleted.
<ipython-input-25-b00a5a2258ad>:30: DeprecationWarning: The order of arguments in worksheet.update() has changed. Please pass values first and range_name secondor used named arguments (range_name=, values=)
worksheet.update('A1', [df.columns.tolist()] + df.values.tolist())
Sheet 'Quiz_54' - Unnecessary columns discovered and will be deleted.
<ipython-input-25-b00a5a2258ad>:30: DeprecationWarning: The order of arguments in worksheet.update() has changed. Please pass values first and range_name secondor used named arguments (range_name=, values=)
worksheet.update('A1', [df.columns.tolist()] + df.values.tolist())
Sheet 'Quiz_55' - Unnecessary columns discovered and will be deleted.
<ipython-input-25-b00a5a2258ad>:30: DeprecationWarning: The order of arguments in worksheet.update() has changed. Please pass values first and range_name secondor used named arguments (range_name=, values=)
worksheet.update('A1', [df.columns.tolist()] + df.values.tolist())
Sheet 'Quiz_56' - Unnecessary columns discovered and will be deleted.
<ipython-input-25-b00a5a2258ad>:30: DeprecationWarning: The order of arguments in worksheet.update() has changed. Please pass values first and range_name secondor used named arguments (range_name=, values=)
worksheet.update('A1', [df.columns.tolist()] + df.values.tolist())
Sheet 'Quiz_57' - Unnecessary columns discovered and will be deleted.
<ipython-input-25-b00a5a2258ad>:30: DeprecationWarning: The order of arguments in worksheet.update() has changed. Please pass values first and range_name secondor used named arguments (range_name=, values=)
worksheet.update('A1', [df.columns.tolist()] + df.values.tolist())
Sheet 'Quiz_58' - Unnecessary columns discovered and will be deleted.
<ipython-input-25-b00a5a2258ad>:30: DeprecationWarning: The order of arguments in worksheet.update() has changed. Please pass values first and range_name secondor used named arguments (range_name=, values=)
worksheet.update('A1', [df.columns.tolist()] + df.values.tolist())
Sheet 'Quiz_59' - Unnecessary columns discovered and will be deleted.
<ipython-input-25-b00a5a2258ad>:30: DeprecationWarning: The order of arguments in worksheet.update() has changed. Please pass values first and range_name secondor used named arguments (range_name=, values=)
worksheet.update('A1', [df.columns.tolist()] + df.values.tolist())
Sheet 'Quiz_60' - Unnecessary columns discovered and will be deleted.
<ipython-input-25-b00a5a2258ad>:30: DeprecationWarning: The order of arguments in worksheet.update() has changed. Please pass values first and range_name secondor used named arguments (range_name=, values=)
worksheet.update('A1', [df.columns.tolist()] + df.values.tolist())
Sheet 'Quiz_61' - Unnecessary columns discovered and will be deleted.
<ipython-input-25-b00a5a2258ad>:30: DeprecationWarning: The order of arguments in worksheet.update() has changed. Please pass values first and range_name secondor used named arguments (range_name=, values=)
worksheet.update('A1', [df.columns.tolist()] + df.values.tolist())
Sheet 'Quiz_62' - Unnecessary columns discovered and will be deleted.
<ipython-input-25-b00a5a2258ad>:30: DeprecationWarning: The order of arguments in worksheet.update() has changed. Please pass values first and range_name secondor used named arguments (range_name=, values=)
worksheet.update('A1', [df.columns.tolist()] + df.values.tolist())
Sheet 'Quiz_63' - Unnecessary columns discovered and will be deleted.
<ipython-input-25-b00a5a2258ad>:30: DeprecationWarning: The order of arguments in worksheet.update() has changed. Please pass values first and range_name secondor used named arguments (range_name=, values=)
worksheet.update('A1', [df.columns.tolist()] + df.values.tolist())
Sheet 'Quiz_64' - Unnecessary columns discovered and will be deleted.
<ipython-input-25-b00a5a2258ad>:30: DeprecationWarning: The order of arguments in worksheet.update() has changed. Please pass values first and range_name secondor used named arguments (range_name=, values=)
worksheet.update('A1', [df.columns.tolist()] + df.values.tolist())
Sheet 'Quiz_65' - Unnecessary columns discovered and will be deleted.
<ipython-input-25-b00a5a2258ad>:30: DeprecationWarning: The order of arguments in worksheet.update() has changed. Please pass values first and range_name secondor used named arguments (range_name=, values=)
worksheet.update('A1', [df.columns.tolist()] + df.values.tolist())
Sheet 'Quiz_66' - Unnecessary columns discovered and will be deleted.
<ipython-input-25-b00a5a2258ad>:30: DeprecationWarning: The order of arguments in worksheet.update() has changed. Please pass values first and range_name secondor used named arguments (range_name=, values=)
worksheet.update('A1', [df.columns.tolist()] + df.values.tolist())
Sheet 'Quiz_67' - Unnecessary columns discovered and will be deleted.
<ipython-input-25-b00a5a2258ad>:30: DeprecationWarning: The order of arguments in worksheet.update() has changed. Please pass values first and range_name secondor used named arguments (range_name=, values=)
worksheet.update('A1', [df.columns.tolist()] + df.values.tolist())
Sheet 'Quiz_68' - Unnecessary columns discovered and will be deleted.
<ipython-input-25-b00a5a2258ad>:30: DeprecationWarning: The order of arguments in worksheet.update() has changed. Please pass values first and range_name secondor used named arguments (range_name=, values=)
worksheet.update('A1', [df.columns.tolist()] + df.values.tolist())
Sheet 'Quiz_69' - Unnecessary columns discovered and will be deleted.
<ipython-input-25-b00a5a2258ad>:30: DeprecationWarning: The order of arguments in worksheet.update() has changed. Please pass values first and range_name secondor used named arguments (range_name=, values=)
worksheet.update('A1', [df.columns.tolist()] + df.values.tolist())
Sheet 'Quiz_70' - Unnecessary columns discovered and will be deleted.
<ipython-input-25-b00a5a2258ad>:30: DeprecationWarning: The order of arguments in worksheet.update() has changed. Please pass values first and range_name secondor used named arguments (range_name=, values=)
worksheet.update('A1', [df.columns.tolist()] + df.values.tolist())
Sheet 'Quiz_71' - Unnecessary columns discovered and will be deleted.
<ipython-input-25-b00a5a2258ad>:30: DeprecationWarning: The order of arguments in worksheet.update() has changed. Please pass values first and range_name secondor used named arguments (range_name=, values=)
worksheet.update('A1', [df.columns.tolist()] + df.values.tolist())
Sheet 'Quiz_72' - Unnecessary columns discovered and will be deleted.
<ipython-input-25-b00a5a2258ad>:30: DeprecationWarning: The order of arguments in worksheet.update() has changed. Please pass values first and range_name secondor used named arguments (range_name=, values=)
worksheet.update('A1', [df.columns.tolist()] + df.values.tolist())
Sheet 'Quiz_73' - Unnecessary columns discovered and will be deleted.
<ipython-input-25-b00a5a2258ad>:30: DeprecationWarning: The order of arguments in worksheet.update() has changed. Please pass values first and range_name secondor used named arguments (range_name=, values=)
worksheet.update('A1', [df.columns.tolist()] + df.values.tolist())
Sheet 'Quiz_74' - Unnecessary columns discovered and will be deleted.
<ipython-input-25-b00a5a2258ad>:30: DeprecationWarning: The order of arguments in worksheet.update() has changed. Please pass values first and range_name secondor used named arguments (range_name=, values=)
worksheet.update('A1', [df.columns.tolist()] + df.values.tolist())
Sheet 'Quiz_75' - Unnecessary columns discovered and will be deleted.
<ipython-input-25-b00a5a2258ad>:30: DeprecationWarning: The order of arguments in worksheet.update() has changed. Please pass values first and range_name secondor used named arguments (range_name=, values=)
worksheet.update('A1', [df.columns.tolist()] + df.values.tolist())
Sheet 'Quiz_76' - Unnecessary columns discovered and will be deleted.
<ipython-input-25-b00a5a2258ad>:30: DeprecationWarning: The order of arguments in worksheet.update() has changed. Please pass values first and range_name secondor used named arguments (range_name=, values=)
worksheet.update('A1', [df.columns.tolist()] + df.values.tolist())
Necessary columns have been selected.
In [ ]:
# Add corresponding conditions of each profile in every survey within representative data set to the last column
import gspread
from oauth2client.service_account import ServiceAccountCredentials
import pandas as pd
import time
scope = ['https://www.googleapis.com/auth/spreadsheets',
'https://www.googleapis.com/auth/drive.file',
'https://www.googleapis.com/auth/drive']
creds = ServiceAccountCredentials.from_json_keyfile_name('credentials.json', scope)
client = gspread.authorize(creds)
rpt_sheet_url = 'https://docs.google.com/spreadsheets/d/1OGcWVmFb5ulAp9UEi8BV5zShPU3SP-gSEjhSllZQqb0/edit?gid=352691556#gid=352691556'
sheet = client.open_by_url(rpt_sheet_url)
sheet_names = [f"Quiz_{i}" for i in range(1, 41)]
for sheet_name in sheet_names:
worksheet = sheet.worksheet(sheet_name)
data = worksheet.get_all_records()
df = pd.DataFrame(data)
if 'Condition' not in df.columns:
if 1 <= int(sheet_name.split('_')[1]) <= 10:
df['Condition'] = 'Base'
elif 11 <= int(sheet_name.split('_')[1]) <= 20:
df['Condition'] = 'Base + Health'
elif 21 <= int(sheet_name.split('_')[1]) <= 30:
df['Condition'] = 'Base + Finance'
elif 31 <= int(sheet_name.split('_')[1]) <= 40:
df['Condition'] = 'Base + Sensitive'
header = df.columns.tolist()
worksheet.update([header] + df.values.tolist())
print(f"Updated {sheet_name} with 'Condition' column.")
time.sleep(2)
print("Conditions have been added to each profile in every survey.")
In [ ]:
# Integrating each profile and condition to another sheet called integration.
# It moves values intermittently to minimize the probability of API runtime error occurrence.
import time
import gspread
from oauth2client.service_account import ServiceAccountCredentials
scope = ['https://www.googleapis.com/auth/spreadsheets',
'https://www.googleapis.com/auth/drive.file',
'https://www.googleapis.com/auth/drive']
creds = ServiceAccountCredentials.from_json_keyfile_name('credentials.json', scope)
client = gspread.authorize(creds)
rpt_sheet_url = 'https://docs.google.com/spreadsheets/d/1OGcWVmFb5ulAp9UEi8BV5zShPU3SP-gSEjhSllZQqb0/edit?gid=352691556#gid=352691556'
spreadsheet = client.open_by_url(rpt_sheet_url)
try:
integration_sheet = spreadsheet.add_worksheet(title="integration", rows="1000", cols="26") # Default 26 columns for flexibility
except Exception as e:
integration_sheet = spreadsheet.worksheet("integration")
integration_sheet.update("A1:B1000", [[""]] * 1000)
integration_sheet.update('A1', [["id", "Condition"]])
row_index = 2
for i in range(66, 77): # If API runtime error occurs, delete the incomplete area and adjust the range
sheet_name = f"Quiz_{i}"
quiz_sheet = spreadsheet.worksheet(sheet_name)
data = quiz_sheet.get_all_values()[1:]
rows_to_add = []
for row in data:
rows_to_add.append([row[1], row[-1]])
cell_range = f"A{row_index}:B{row_index + len(rows_to_add) - 1}"
try:
integration_sheet.update(cell_range, rows_to_add)
except Exception as e:
time.sleep(5)
integration_sheet.update(cell_range, rows_to_add)
row_index += len(rows_to_add)
time.sleep(1)
<ipython-input-31-4840b0e9d117>:21: DeprecationWarning: The order of arguments in worksheet.update() has changed. Please pass values first and range_name secondor used named arguments (range_name=, values=)
integration_sheet.update("A1:B1000", [[""]] * 1000)
<ipython-input-31-4840b0e9d117>:22: DeprecationWarning: The order of arguments in worksheet.update() has changed. Please pass values first and range_name secondor used named arguments (range_name=, values=)
integration_sheet.update('A1', [["id", "Condition"]])
<ipython-input-31-4840b0e9d117>:35: DeprecationWarning: The order of arguments in worksheet.update() has changed. Please pass values first and range_name secondor used named arguments (range_name=, values=)
integration_sheet.update(cell_range, rows_to_add)
In [ ]:
# Add corresponding conditions of each profile in Sheet 3 of update counter file to the last column
# Changing less than 5 surveys each time running can minimize API error probability
import gspread
from oauth2client.service_account import ServiceAccountCredentials
import time
scope = ['https://www.googleapis.com/auth/spreadsheets', 'https://www.googleapis.com/auth/drive.file', 'https://www.googleapis.com/auth/drive']
creds = ServiceAccountCredentials.from_json_keyfile_name('credentials.json', scope)
client = gspread.authorize(creds)
rpt_sheet_url = 'https://docs.google.com/spreadsheets/d/1OGcWVmFb5ulAp9UEi8BV5zShPU3SP-gSEjhSllZQqb0/edit?gid=0#gid=0'
update_sheet_url = 'https://docs.google.com/spreadsheets/d/1OSoeG07uhYDsYEUP38Rc6bMwJySvflqGWfryTjx1gIo/edit?gid=2023416044#gid=2023416044'
rpt_sheet = client.open_by_url(rpt_sheet_url)
update_sheet = client.open_by_url(update_sheet_url).worksheet("Sheet4")
integration_sheet = rpt_sheet.worksheet("integration")
integration_data = integration_sheet.get_all_values()[1:]
integration_dict = {str(row[0]): row[1] for row in integration_data}
sheet3_data = update_sheet.get_all_values()
header = sheet3_data[0]
rows = sheet3_data[1:]
if 'Condition' not in header:
header.append('Condition')
update_sheet.update('A1', [header])
updated_rows = []
for row in rows:
person_id = str(row[0])
condition = integration_dict.get(person_id, 'Unknown')
if len(row) < len(header):
row.append(condition)
else:
row[-1] = condition
updated_rows.append(row)
update_sheet.update(f"A2:{chr(64 + len(header))}{len(rows) + 1}", updated_rows)
print("Conditions have been added to the Sheet4 of update counter table.")
<ipython-input-4-9c58a3e09918>:28: DeprecationWarning: The order of arguments in worksheet.update() has changed. Please pass values first and range_name secondor used named arguments (range_name=, values=)
update_sheet.update('A1', [header])
<ipython-input-4-9c58a3e09918>:40: DeprecationWarning: The order of arguments in worksheet.update() has changed. Please pass values first and range_name secondor used named arguments (range_name=, values=)
update_sheet.update(f"A2:{chr(64 + len(header))}{len(rows) + 1}", updated_rows)
Conditions have been added to the Sheet4 of update counter table.
In [ ]:
def create_description(data_row):
# Basic features
name = data_row['NAME']
id = data_row['id']
gender = data_row['GENDER'].lower()
if gender == 'male':
pronoun = 'He'
pronoun2 = 'His'
else:
pronoun = 'She'
pronoun2 = 'Her'
racethn = data_row['RACETHN']
if racethn == "Other race":
racethn = "unidentified"
educcat5 = data_row['EDUCCAT5'].lower()
if educcat5 == 'less than hs':
educcat5 = 'a less than high school'
elif educcat5 == 'hs grad':
educcat5 = 'a high school graduate'
elif educcat5 == 'some college':
educcat5 = 'some college'
elif educcat5 == 'College grad':
educcat5 = 'a college graduate'
else:
educcat5 = 'a postgraduate'
marital_acs = data_row['MARITAL_ACS'].lower()
if marital_acs == 'never married':
marital_acs = 'has never married'
elif marital_acs == 'now married':
marital_acs = 'is now married'
else:
marital_acs = 'is divorced'
childrencat = data_row['CHILDRENCAT'].lower()
if childrencat == "no children":
childrencat = "doesn't have"
else:
childrencat = "has"
born_acs = data_row['BORN_ACS']
if born_acs == 'Inside the United States':
born_acs = 'inside the U.S.'
else:
born_acs = 'outside the U.S.'
age_int = data_row['AGE_INT']
citizen_rec = data_row['CITIZEN_REC'].lower()
if citizen_rec == 'Yes, a U.S. citizen':
citizen_rec = "US citizen"
else:
citizen_rec = "non-US citizen"
division = data_row['DIVISION']
division_to_common_name = {
'Pacific': 'the West Coast',
'Mountain': 'the Mountain States',
'West North Central': 'the Upper Midwest',
'West South Central': 'the South Central',
'East North Central': 'the Great Lakes region',
'East South Central': 'the Deep South',
'South Atlantic': 'the South',
'Middle Atlantic': 'the Northeast',
'New England': 'New England'
}
if division in division_to_common_name:
division = division_to_common_name[division]
else:
division = ""
base_description = (
f"{name} has the following attributes:\r"
f"• {pronoun2} age is between {age_int} years.\r"
f"• {pronoun} is of {racethn} descent.\r"
f"• {pronoun2} gender is {gender}.\r"
f"• {pronoun} has {educcat5} level education.\r"
f"• {pronoun} {marital_acs}.\r"
f"• {pronoun} {childrencat} children.\r"
f"• {pronoun} is a {citizen_rec}.\r"
f"• {pronoun} was born {born_acs}\r"
f"• {pronoun} resides in {division} region of the U.S.\r"
)
# Health features
hiv = data_row.get('HIV_STAT', None)
if hiv:
hiv = hiv.lower()
pregnancy = data_row.get('PREG_STAT', None)
if pregnancy:
if gender == 'male':
pregnancy = ""
else:
pregnancy = data_row.get('PREG_STAT', None)
if pregnancy == 'Positive':
pregnancy = "• She is pregnant.\r"
else:
pregnancy = "• She is not pregnant.\r"
num_chronic_illness = data_row.get('NumChronicIllness', None)
if isinstance(num_chronic_illness, (int, float)):
num_chronic_illness = str(num_chronic_illness)
num_chronic_illness = num_chronic_illness.lower()
if num_chronic_illness == '0':
chronill = 'not chronically ill'
else:
chronill = 'chronically ill'
if hiv and num_chronic_illness:
health_description = (
f"• {pronoun2} HIV status is {hiv}.\r"
f"{pregnancy}"
f"• {pronoun} is {chronill}.\r"
)
else:
health_description = ""
# finance features
faminc5 = data_row.get('FAMINC5', None)
if faminc5:
faminc5.lower()
if faminc5 == '$20K to less than $40K':
faminc5 = 'between $20K to less than $40K'
elif faminc5 == '$40K to less than $75K':
faminc5 = 'between $40K to less than $75K'
elif faminc5 == '$75K to less than $150K':
faminc5 = 'between $75K to less than $150K'
else:
None
cred = data_row.get('CC_NUM', None)
if cred:
if isinstance(cred, (int, float)):
cred = str(cred)
cred = cred.lower()
if cred != 0:
cred = "has"
else:
cred = "does not have"
fdstmp_cps = data_row.get('FDSTMP_CPS', None)
if fdstmp_cps == 'yes':
fdstmp_cps = "currently receives foodstamps"
else:
fdstmp_cps = "does not currently receive foodstamps"
if faminc5 and cred and fdstmp_cps:
finance_description = (
f"• {pronoun2} family's annual income is {faminc5}.\r"
f"• {pronoun} {cred} a credit card.\r"
f"• {pronoun} {fdstmp_cps}.\r"
)
else:
finance_description = ""
# sensitive features
sexuality = data_row.get('SEXUALITY', None)
if sexuality:
sexuality = sexuality.lower()
owngun_gss = data_row.get('OWNGUN_GSS', None)
if owngun_gss == 'Yes':
owngun_gss = "owns a gun"
else:
owngun_gss = "doesn't own a gun"
religcat = data_row.get('RELIGCAT', None)
if sexuality and religcat and owngun_gss:
sensitive_description = (
f"• {pronoun} is {sexuality}.\r"
f"• {pronoun} {owngun_gss}.\r"
f"• {pronoun} is {religcat}.\r"
)
else:
sensitive_description = ""
description = base_description + health_description + finance_description + sensitive_description
return description
In [ ]:
# Mute it after creating forms and URLs
'''
from google.oauth2 import service_account
from googleapiclient.discovery import build
import gspread
# Authentication and service setup
service_account_key_path = 'credentials.json'
credentials = service_account.Credentials.from_service_account_file(
service_account_key_path,
scopes=['https://www.googleapis.com/auth/drive', 'https://www.googleapis.com/auth/spreadsheets']
)
client = gspread.authorize(credentials)
forms_service = build("forms", "v1", credentials=credentials)
# Open the source and destination spreadsheets
source_spreadsheet = client.open_by_url('https://docs.google.com/spreadsheets/d/1OGcWVmFb5ulAp9UEi8BV5zShPU3SP-gSEjhSllZQqb0/edit?gid=0#gid=0')
destination_spreadsheet = client.open_by_url('https://docs.google.com/spreadsheets/d/12fMUJYC8ZpayamsyzzpcRK8XJLE2WU-spBYLOTr-jBs/edit?gid=0#gid=0')
summary_worksheet = destination_spreadsheet.sheet1
# Prepare the destination worksheet
summary_worksheet.update('A1:B1', [["Quiz Name", "Form URL"]])
# Iterate through each survey, create forms, and log URLs
for i, worksheet in enumerate(source_spreadsheet.worksheets()[1:]):
survey_name = worksheet.title
form_body = {"info": {"title": f"Harm Scores Survey: {survey_name}"}}
result = forms_service.forms().create(body=form_body).execute()
form_id = result["formId"]
form_url = f"https://docs.google.com/forms/d/{form_id}/edit"
summary_worksheet.append_row([survey_name, form_url])
print("All surveys and form URLs have been logged.")
'''
<ipython-input-36-32025a29155f>:22: DeprecationWarning: The order of arguments in worksheet.update() has changed. Please pass values first and range_name secondor used named arguments (range_name=, values=)
summary_worksheet.update('A1:B1', [["Quiz Name", "Form URL"]])
All surveys and form URLs have been logged.
In [ ]:
import re
import pandas as pd
import time
summary_worksheet = destination_spreadsheet.worksheet('Sheet1')
form_urls_records = summary_worksheet.get_all_records()
for record in form_urls_records:
quiz_name = record['Quiz Name']
form_url = record['Form URL']
form_id_match = re.search(r'/d/(.*?)/edit', form_url)
if form_id_match:
form_id = form_id_match.group(1)
matching_worksheet = source_spreadsheet.worksheet(quiz_name)
# Assuming each row in the worksheet is a person's information
person_records = matching_worksheet.get_all_records()
if not person_records:
print(f"No data found in the worksheet for quiz: {quiz_name}")
continue
df_worksheet = pd.DataFrame(person_records)
df_worksheet['Description'] = ''
for index, row in df_worksheet.iterrows():
full_description = create_description(row)
df_worksheet.at[index, 'Description'] = full_description
update_requests = []
first_request = {
"createItem": {
"item": {
"textItem": {},
"title": f"We are trying to quantify harm from the release of personal information. Please answer each question to the best of your abilities. For each subject, read the entire profile—sometimes, the harm might (or might not) be context-dependent.\
\r\rAll of the people included in this survey are fake (synthetic)—we are not showing you real data about real people. This study is IRB approved."
},
"location": {"index": 0},
},
}
update_requests.append(first_request)
break_request = {
"createItem": {
"item": {
"pageBreakItem": {},
},
"location": {"index": 1},
},
}
update_requests.append(break_request)
prolific_id = {
"createItem": {
"item": {
"title": "What is your Prolific ID?",
"questionItem": {
"question": {
"required": True,
"textQuestion": {
},
},
},
},
"location": {"index": 2},
},
}
update_requests.append(prolific_id)
break_request = {
"createItem": {
"item": {
"pageBreakItem": {},
},
"location": {"index": 3},
},
}
update_requests.append(break_request)
question_number = 1
location_index = 4
row_count = 0
for index, row in df_worksheet.iterrows():
row_count += 1
description_value = row['Description']
id = row['id']
description_request = {
"createItem": {
"item": {
"textItem": {},
"title": f"Sample person {question_number}\r\r{description_value}"
},
"location": {"index": location_index},
},
}
update_requests.append(description_request)
question_number += 1
location_index += 1
attention_check = {
"createItem": {
"item": {
"title": "What is this person's ID number? (Please select the given answer; this question is purely for data processing purposes.)",
"questionItem": {
"question": {
"required": True,
"choiceQuestion": {
"type": "RADIO",
"options": [
{"value": f"{id}"},
],
},
},
},
},
"location": {"index": location_index},
},
}
update_requests.append(attention_check)
location_index += 1
if row_count == 10:
attention_check2 = {
"createItem": {
"item": {
"title": "What do you hope elected leaders in Washington do in the next year? This is an attention check question.",
"questionItem": {
"question": {
"required": True,
"textQuestion": {
},
},
},
},
"location": {"index": location_index},
},
}
update_requests.append(attention_check2)
location_index += 1
else:
attention_check2 = {}
if row_count == 20:
attention_check3 = {
"createItem": {
"item": {
"title": "To ensure that you have read this question carefully, please type “Green” as your answer. This is an attention check question.",
"questionItem": {
"question": {
"required": True,
"textQuestion": {
},
},
},
},
"location": {"index": location_index},
},
}
update_requests.append(attention_check3)
location_index += 1
else:
attention_check3 = {}
name = row['NAME']
gender = row['GENDER'].lower()
if gender == 'male':
pronoun2 = 'his'
else:
pronoun2 = 'her'
q1_request = {
"createItem": {
"item": {
"title": f"How harmful do you think most people in the U.S. would consider it if hackers or cybercriminals stole {name}'s personal data in a data breach?",
"questionItem": {
"question": {
"required": True,
"choiceQuestion": {
"type": "RADIO",
"options": [
{"value": 'Not at all harmful'},
{"value": 'Slightly harmful'},
{"value": 'Moderately harmful'},
{"value": 'Very harmful'},
{"value": 'Extremely harmful'},
],
},
},
},
},
"location": {"index": location_index},
},
}
update_requests.append(q1_request)
location_index += 1
q2_request = {
"createItem": {
"item": {
"title": f"How harmful do you think most people in the U.S. would consider it if government agencies accessed {name}'s personal information without {pronoun2} knowledge or consent?",
"questionItem": {
"question": {
"required": True,
"choiceQuestion": {
"type": "RADIO",
"options": [
{"value": 'Not at all harmful'},
{"value": 'Slightly harmful'},
{"value": 'Moderately harmful'},
{"value": 'Very harmful'},
{"value": 'Extremely harmful'},
],
},
},
},
},
"location": {"index": location_index},
},
}
update_requests.append(q2_request)
location_index += 1
q3_request = {
"createItem": {
"item": {
"title": f"How harmful do you think most people in the U.S. would consider it if a corporation collected and used {name}'s personal data for {pronoun2} own purposes without {pronoun2} consent?",
"questionItem": {
"question": {
"required": True,
"choiceQuestion": {
"type": "RADIO",
"options": [
{"value": 'Not at all harmful'},
{"value": 'Slightly harmful'},
{"value": 'Moderately harmful'},
{"value": 'Very harmful'},
{"value": 'Extremely harmful'},
],
},
},
},
},
"location": {"index": location_index},
},
}
update_requests.append(q3_request)
location_index += 1
q4_request = {
"createItem": {
"item": {
"title": f"How harmful do you think most people in the U.S. would consider it if {name}'s employer or colleagues viewed {pronoun2} personal data without {pronoun2} permission?",
"questionItem": {
"question": {
"required": True,
"choiceQuestion": {
"type": "RADIO",
"options": [
{"value": 'Not at all harmful'},
{"value": 'Slightly harmful'},
{"value": 'Moderately harmful'},
{"value": 'Very harmful'},
{"value": 'Extremely harmful'},
],
},
},
},
},
"location": {"index": location_index},
},
}
update_requests.append(q4_request)
location_index += 1
q5_request = {
"createItem": {
"item": {
"title": f"How harmful do you think most people in the U.S. would consider it if {name}'s personal information was inadvertently shared with {pronoun2} family members without {pronoun2} consent?",
"questionItem": {
"question": {
"required": True,
"choiceQuestion": {
"type": "RADIO",
"options": [
{"value": 'Not at all harmful'},
{"value": 'Slightly harmful'},
{"value": 'Moderately harmful'},
{"value": 'Very harmful'},
{"value": 'Extremely harmful'},
],
},
},
},
},
"location": {"index": location_index},
},
}
update_requests.append(q5_request)
location_index += 1
q6_request = {
"createItem": {
"item": {
"title": f"How harmful do you think most people in the U.S. would consider it if personal details about {name} were disclosed to {pronoun2} close friends without {pronoun2} knowledge?",
"questionItem": {
"question": {
"required": True,
"choiceQuestion": {
"type": "RADIO",
"options": [
{"value": 'Not at all harmful'},
{"value": 'Slightly harmful'},
{"value": 'Moderately harmful'},
{"value": 'Very harmful'},
{"value": 'Extremely harmful'},
],
},
},
},
},
"location": {"index": location_index},
},
}
update_requests.append(q6_request)
location_index += 1
q7_request = {
"createItem": {
"item": {
"title": f"How harmful do you think most people in the U.S. would consider it if {name}'s personal data was accessed by acquaintances who know {name} by face and name but don't interact with {name} regularly?",
"questionItem": {
"question": {
"required": True,
"choiceQuestion": {
"type": "RADIO",
"options": [
{"value": 'Not at all harmful'},
{"value": 'Slightly harmful'},
{"value": 'Moderately harmful'},
{"value": 'Very harmful'},
{"value": 'Extremely harmful'},
],
},
},
},
},
"location": {"index": location_index},
},
}
update_requests.append(q7_request)
location_index += 1
q8_request = {
"createItem": {
"item": {
"title": f"How harmful do you think most people in the U.S. would consider it if {name}'s personal information was made publicly available on the internet for anyone to access without {pronoun2} consent?",
"questionItem": {
"question": {
"required": True,
"choiceQuestion": {
"type": "RADIO",
"options": [
{"value": 'Not at all harmful'},
{"value": 'Slightly harmful'},
{"value": 'Moderately harmful'},
{"value": 'Very harmful'},
{"value": 'Extremely harmful'},
],
},
},
},
},
"location": {"index": location_index},
},
}
update_requests.append(q8_request)
location_index += 1
break_request = {
"createItem": {
"item": {
"pageBreakItem": {},
},
"location": {"index": location_index},
},
}
update_requests.append(break_request)
location_index += 1
update = {"requests": update_requests}
last_request = {
"createItem": {
"item": {
"textItem": {},
"title": f"Prolific code: COLYGFMB"
},
"location": {"index": location_index},
},
}
update_requests.append(last_request)
location_index += 1
try:
t = "Harm Scores Survey"
forms_service.forms().batchUpdate(formId=form_id, body={"requests": update_requests}).execute()
drive_service = build('drive', 'v3', credentials=credentials)
# Temporarily muted
drive_service.permissions().create(
fileId=form_id,
body={
'role': 'writer',
'type': 'user',
'emailAddress': 'shreya.kochar1@gmail.com'
}
).execute()
print(f"Form updated successfully: {form_id}")
except Exception as e:
print(f"Unable to update form {form_id}: {e}")
else:
print(f"Unable to extract form ID from URL: {form_url}")
# Program delay
time.sleep(2)
Form updated successfully: 1Wrfzzm8GeDNKBQsEH4h2UUtzRzhSmj1c02aQaJkBI6s Form updated successfully: 17seIXv0sOCcGT2s4Ah2mkkL086vRWVaf6kaXqXl-Dzw Form updated successfully: 1JHYAiMRe_gLQyNFn4U22cA0WuuBTFLfsvAKMgSmoLXg Form updated successfully: 1hTA4DRe7s89xwvADnZLNti814zsC0K-PgLB4n8GoUpI Form updated successfully: 1EGnzEvl-Y6oDOUlX1sLYjLjK3QB6DVM5NrlwfAD9l2w Form updated successfully: 1Tvj4A_kbEieqUSaLEAPSXyijQKp-RyDQFqDIiaya1OE Form updated successfully: 1nq3rp6okjDp6-ykr3Cb4Sr-eAzlXJ2bZUMJJrpULUbg Form updated successfully: 1KWFeBj4yxaSSXhnUsoj38UJMx-4qgHclvhNn2JmdaaE Form updated successfully: 1aJ2IgCSh3pajXpxPOeXSfpJ-gnYcH2RAZxT9XskRC54 Form updated successfully: 1Nipu71J8H9dKOfFAnGmT18MFV_hQBxXMjkbD_PgJY3Y Form updated successfully: 113ApJyceab8b6rGq0aoQXDeF7zWUHKMT_J41PFGlfvs Form updated successfully: 1Pp6X8sQgno5JAlboTJcim7qzgQ7CuK5K2STIQuuf2-s Form updated successfully: 16dzWs1GBFdESLi9dI2yx6siITik0JbrRKwdD9LIfMbs Form updated successfully: 1Y6mKTuHwq436qryhsQzhJTErOclcRpP2eFsaGA8c6e4 Form updated successfully: 17VT1pAC2suPtlLm0KzxUI9W_Re8nX4AzMKuakYLGAR0 Form updated successfully: 1jBINeal587U1WvF7McXOU6DE0SFEcLfQmql5okX3KRA Form updated successfully: 1S5DBWZIsenzzlR8oCmCXI7Z4ewWafCgfA6rwllsb_24 Form updated successfully: 1bIBjipXlppgfcqJ085Fz1zoBANJh_50uw_G2FRHj5AM Form updated successfully: 1CRLdS9tFu1qRWhJORYRlq9_rygBlqAvpKFSaXnQMMyY Form updated successfully: 168x9HBGV_n7dy6WFdJxv4fYtv5NHkMG7VXJZmCdQQW0 Form updated successfully: 1ataLuMJynbE0rCed_ozMFwX-mF4MPjpBr0EUyhi_WGU Form updated successfully: 1utzIm5L1MSH8S98y7xjZffgFk0-qxUJEfeBEqUsVDpU Form updated successfully: 1xZVOjUN87a4grVetzXs61Glg0PhCbY1hDae_GjnIFeI Form updated successfully: 1pocKFVmAHDqYVvpww3Xan8eK0Af087AQVTX4Gcdf2P0 Form updated successfully: 1gcx-4wyNLQr8samszcZwcG8zqwHGgv3oZAYJ6Ewzz3A Form updated successfully: 1DB13Q2tOuVrA2Bd7tbN53wLAKyw8BqqDCD4ldQdmzK4 Form updated successfully: 1l-iwkMK9nZ77ptnELTuxFp6EnmvJTPlYwdjnzLTXV8o Form updated successfully: 1oz9Ubo2ni1E_I58c0gZyQuDRS5-Q3t5T5s7wLpoY3FA Form updated successfully: 1c7lfXlyC7QUyWLH9NlPSqYb25b0u0ajjYVB8ckwjqg8 Form updated successfully: 1UIyYQ75V0yQhF8pcMUq7CDsl7dFFh8mm7VkrXHvGg5E Form updated successfully: 1qngG5Z9SSGx-4adpb1mTiSrPr9leIJLooSSWrYm1LT8 Form updated successfully: 1I7TJtrzUJ-QI7EreJb8uQbLPnG3Q8vyDxz6kGk8GFuk Form updated successfully: 1yz-Xh6qEApabcSxCOYEwNNb1I1PbU2uLw_EM9xC3CqE Form updated successfully: 1RKqnwF4TFWBA4INU5GAvo04bGd6Fpb_mxd2cgoBOY6M Form updated successfully: 1moV1l7Jp6-nW-NpGKxiFGhPtTUetnahVwzMdrzOLRIo Form updated successfully: 1EcVcMNdgHkyxNKxGxHeTUqrJf6ZvSnBkenxoHJuC0Rs Form updated successfully: 1Il3UiaW42jLiSkQn9Lkimg9u95CDg4o8UfK9pQt7Gs0 Form updated successfully: 1aG4BcuYqWaM_vv_f_hg-hb8qk5i1Ni2HMlZmaOvV2d4 Form updated successfully: 1ZP3ivl1Ia_RrRlrTZKal4Mi93Op3YeweNJIEQMPYxao Form updated successfully: 11mjX9JP8sdhSuZPfUD4NTJbSSwBQr8-GxoG7rjMY3-E Form updated successfully: 1-tmZrTf5ZrGhqE1QAOcNpk42-NBsa-h7wOL_2pd58Ok
In [ ]:
# Muted after creating a sheet with certainty
'''
from google.colab import auth
from google.oauth2 import service_account
from googleapiclient.discovery import build
import gspread
from oauth2client.service_account import ServiceAccountCredentials
import pandas as pd
from gspread_dataframe import set_with_dataframe
# Added: representative sheet with certainty access
auth.authenticate_user()
scope = ['https://www.googleapis.com/auth/spreadsheets', 'https://www.googleapis.com/auth/drive.file', 'https://www.googleapis.com/auth/drive']
creds = ServiceAccountCredentials.from_json_keyfile_name('credentials.json', scope)
client = gspread.authorize(creds)
rpt_sheet_cty_url = 'https://docs.google.com/spreadsheets/d/1vzWPqmy_HcM_Ntowg4V5HD307ZGFS0XXBInrXmN-xYo/edit?gid=0#gid=0'
rpt_sheet_cty = client.open_by_url(rpt_sheet_cty_url)
worksheet = rpt_sheet_cty.worksheet("selected_data")
data = worksheet.get_all_records()
df = pd.DataFrame(data)
def replicate_rows_with_certainty(row):
replicated_rows = []
for certainty in [50, 75, 100]:
new_row = row.copy()
new_row['certainty'] = certainty
replicated_rows.append(new_row)
return replicated_rows
transformed_rows = []
for _, row in df.iterrows():
transformed_rows.extend(replicate_rows_with_certainty(row))
transformed_df = pd.DataFrame(transformed_rows)
worksheet.clear()
columns = transformed_df.columns.values.tolist()
worksheet.update([columns] + transformed_df.values.tolist())
transformed_df.head()
'''
Out[ ]:
| NAME | id | GENDER | AGE | RACETHN | EDUCCAT5 | DIVISION | MARITAL_ACS | HHSIZECAT | ... | SEXUALITY | HIV_STAT | PREG_STAT | CC_NUM | cc_encoded | cc_disclosed | NumChronicIllness | AGE_INT | Cluster | certainty | ||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 4775 | Joseph Burton | 4776 | Male | 36 | White non-Hispanic | College grad | West South Central | Now married | 3+ | ... | Heterosexual | negative | Not Applicable | 2.237470e+15 | 1 | 1 | 2 | 30-39 | 0 | 50 |
| 0 | 4775 | Joseph Burton | 4776 | Male | 36 | White non-Hispanic | College grad | West South Central | Now married | 3+ | ... | Heterosexual | negative | Not Applicable | 2.237470e+15 | 1 | 1 | 2 | 30-39 | 0 | 75 |
| 0 | 4775 | Joseph Burton | 4776 | Male | 36 | White non-Hispanic | College grad | West South Central | Now married | 3+ | ... | Heterosexual | negative | Not Applicable | 2.237470e+15 | 1 | 1 | 2 | 30-39 | 0 | 100 |
| 1 | 3539 | Joseph Payne | 3540 | Male | 50 | White non-Hispanic | HS Grad | Middle Atlantic | Now married | 3+ | ... | Heterosexual | negative | Not Applicable | 5.273270e+15 | 1 | 0 | 1 | 50-59 | 0 | 50 |
| 1 | 3539 | Joseph Payne | 3540 | Male | 50 | White non-Hispanic | HS Grad | Middle Atlantic | Now married | 3+ | ... | Heterosexual | negative | Not Applicable | 5.273270e+15 | 1 | 0 | 1 | 50-59 | 0 | 75 |
5 rows × 50 columns
After experiment
In [ ]:
# Temporarily muted
'''
import scipy.stats as stats
anova_df = df_updated[['RACETHN', 'AGE_INT', 'SEXUALITY', 'EDUCCAT5', 'scores']].dropna()
groups = [group['scores'].values for name, group in anova_df.groupby(['EDUCCAT5'])]
# Perform ANOVA
f_stat, p_value = stats.f_oneway(*groups)
print('ANOVA result: F-statistic = {:.2f}, p-value = {:.3f}'.format(f_stat, p_value))
'''
In [ ]:
# df_updated.to_csv('harm3.csv', index=False)