# Frequently used constants
cluster_total = 14
survey_total = 76

from google.colab import auth
from google.oauth2 import service_account
from googleapiclient.discovery import build

# Authenticate and authorize using a service account
auth.authenticate_user()

import gspread
from oauth2client.service_account import ServiceAccountCredentials
import pandas as pd

scope = ['https://www.googleapis.com/auth/spreadsheets', 'https://www.googleapis.com/auth/drive.file', 'https://www.googleapis.com/auth/drive']
creds = ServiceAccountCredentials.from_json_keyfile_name('credentials.json', scope)
client = gspread.authorize(creds)

spreadsheet_url = 'https://docs.google.com/spreadsheets/d/1CatLlB4axdBW0uJRoioR-x00RsPzQ54CFYq1PqzgV7E/edit?usp=drive_web&ouid=107190373735789167396'
sheet = client.open_by_url(spreadsheet_url)
worksheet = sheet.get_worksheet(0)
data = worksheet.get_all_records()
df = pd.DataFrame(data)

df.head()

# Mute it after creating the representative data set
'''
# Select 152 profiles from each cluster evenly
import pandas as pd
import itertools
from googleapiclient.discovery import build

selected_data = []

for cluster_id in range(cluster_total):
    cluster_data = df[df['Cluster'] == cluster_id]
    selected_cluster_data = []

    if len(selected_cluster_data) < 152:
        selected_cluster_data += cluster_data.sample(n=152 - len(selected_cluster_data), random_state=42).to_dict('records')

    selected_data.extend(selected_cluster_data)

selected_df = pd.DataFrame(selected_data)
'''

for cluster_id in range(cluster_total):
    cluster_data = df[df['Cluster'] == cluster_id]

    # Separate out subsets of interest
    hiv_positive_data = cluster_data[cluster_data['HIV_STAT'] == 'positive']
    pregnant_data = cluster_data[cluster_data['PREG_STAT'] == 'Pregnant']

    # Start building our selection for this cluster in a list
    selection_list = []

    # 1) If HIV+ data exists, pick at least 1 row for sure
    hiv_count = min(4, len(hiv_positive_data))  # up to 4
    if hiv_count > 0:
        selected_hiv = hiv_positive_data.sample(n=hiv_count, random_state=42)
        selection_list.append(selected_hiv)

    # 2) If pregnant data exists, pick at least 1 row for sure
    pregnant_count = min(4, len(pregnant_data))
    if pregnant_count > 0:
        selected_pregnant = pregnant_data.sample(n=pregnant_count, random_state=43)
        # Only add if not the same row (in case one row is both HIV+ & Pregnant):
        if not selected_pregnant.index.isin(selected_hiv.index).all():
            selection_list.append(selected_pregnant)

        # Only add if it's not the exact same row as the HIV+ one selected
        # (In case the same participant is both HIV+ and Pregnant)
        if not selected_pregnant.index.isin(selected_hiv.index).all():
            selection_list.append(selected_pregnant)

    # Convert the forced picks into one DataFrame
    forced_picks = pd.concat(selection_list).drop_duplicates()

    remaining_needed = 152 - forced_picks.shape[0]
    if remaining_needed < 0:
        remaining_needed = 0

    # Exclude forced_picks from the random pool
    leftover_pool = cluster_data.drop(forced_picks.index)

    if leftover_pool.shape[0] < remaining_needed:
        print(f"Not enough rows in cluster {cluster_id} to meet the 152-row requirement.")
        selected_cluster_data = pd.concat([forced_picks, leftover_pool])
    else:
        selected_cluster_data = pd.concat([
            forced_picks,
            leftover_pool.sample(n=remaining_needed, random_state=44)
        ])

    # Finally extend your overall selection
    selected_data.extend(selected_cluster_data.to_dict('records'))

#see all HIV positive people
selected_df[selected_df['PREG_STAT'] == 'Negative']

# Mute it after creating the representative sheet
'''
new_sheet_title = 'representative_set'
new_spreadsheet = client.create(new_sheet_title)
new_worksheet = new_spreadsheet.get_worksheet(0)
new_worksheet.update_title('selected_data')
new_worksheet.update([selected_df.columns.values.tolist()] + selected_df.values.tolist())
spreadsheet_id = new_spreadsheet.id
drive_service = build('drive', 'v3', credentials=creds)

# Move the spreadsheet to the folder
try:
    drive_service.files().update(
        fileId=spreadsheet_id,
        addParents='1uJa_ZyNHqZn-YBju4kRre0HBfbcUWCSQ',
        removeParents='root',
        fields='id, parents'
    ).execute()
    print(f"New sheet '{new_sheet_title}' created and moved to the folder.")
except Exception as e:
    print(f"An error occurred while moving the file: {e}")
'''

New sheet 'representative_set' created and moved to the folder.

# Added: representative sheet access
rpt_sheet_url = 'https://docs.google.com/spreadsheets/d/1OGcWVmFb5ulAp9UEi8BV5zShPU3SP-gSEjhSllZQqb0/edit?gid=0#gid=0'
rpt_sheet = client.open_by_url(rpt_sheet_url)

worksheet = rpt_sheet.get_worksheet(0)
data = worksheet.get_all_records()
df = pd.DataFrame(data)

df.head()

from google.colab import drive
drive.mount('/content/drive')

# Initialize a set to store unique IDs
unique_ids = set()

Mounted at /content/drive

import pandas as pd
import gspread
from oauth2client.service_account import ServiceAccountCredentials
import random

selected_ids = []

# Mute it after creating worksheets
'''
# Added: Create 76 surveys, with each containing profiles of different clusters
import time

main_worksheet = rpt_sheet.worksheet("selected_data")
all_data = main_worksheet.get_all_records()

df = pd.DataFrame(all_data)

selected_indices = set()

for survey_num in range(survey_total):
    survey_sheet_name = f"Quiz_{survey_num + 1}"
    survey_data = []

    try:
        try:
            survey_worksheet = rpt_sheet.worksheet(survey_sheet_name)
            survey_worksheet.clear()
        except gspread.exceptions.WorksheetNotFound:
            rpt_sheet.add_worksheet(title=survey_sheet_name, rows="15", cols="20")
            survey_worksheet = rpt_sheet.worksheet(survey_sheet_name)

        survey_data = []
        for cluster_id in range(cluster_total):
            cluster_data = df[df['Cluster'] == cluster_id]
            available_rows = cluster_data[~cluster_data.index.isin(selected_indices)]
            if len(available_rows) >= 2:
                selected_rows = available_rows.sample(n=2)
                survey_data.append(selected_rows)
                selected_indices.update(selected_rows.index)
            elif not available_rows.empty:
                selected_row = available_rows.sample(n=1)
                survey_data.append(selected_row)
                selected_indices.add(selected_row.index[0])

        header_row = df.columns.tolist()
        all_sheet_data = [header_row] + [list(row) for row in pd.concat(survey_data).itertuples(index=False, name=None)]

        survey_worksheet.update('A1', all_sheet_data)

    except gspread.exceptions.APIError as e:
        print(f"API Error for sheet {survey_sheet_name}: {e}")
        # Add program delay to avoid exceeding API limit
        time.sleep(5)
        continue

    except Exception as e:
        print(f"Unexpected error for sheet {survey_sheet_name}: {e}")
        break
    # Program delay
    time.sleep(2)

print("Worksheet created.")
'''

<ipython-input-23-6fb946d17020>:41: DeprecationWarning: The order of arguments in worksheet.update() has changed. Please pass values first and range_name secondor used named arguments (range_name=, values=)
  survey_worksheet.update('A1', all_sheet_data)

Worksheet created.

import time
import gspread
import pandas as pd

initial_information = ["NAME", "id"]
base_features = [
    "GENDER", "RACETHN", "EDUCCAT5", "DIVISION", "MARITAL_ACS",
    "CHILDRENCAT", "CITIZEN_REC", "BORN_ACS", "AGE_INT"
]
health_features = ["HIV_STAT", "PREG_STAT", "NumChronicIllness"]
finance_features = ["FAMINC5", "CC_NUM", "FDSTMP_CPS"]
sensitive_features = ["SEXUALITY", "OWNGUN_GSS", "RELIGCAT"]

def delete_unnecessary_columns(sheet_name, visible_columns, condition_label):
    worksheet = rpt_sheet.worksheet(sheet_name)
    data = worksheet.get_all_records()
    df = pd.DataFrame(data)

    df['Condition'] = condition_label

    if 'Condition' not in visible_columns:
        visible_columns.append('Condition')

    columns_to_keep = [col for col in visible_columns if col in df.columns]

    if columns_to_keep:
        print(f"Sheet '{sheet_name}' - Unnecessary columns discovered and will be deleted.")
        df = df[columns_to_keep]
        worksheet.clear()
        worksheet.update('A1', [df.columns.tolist()] + df.values.tolist())
    else:
        print(f"Sheet '{sheet_name}' - Unnecessary columns may have already been deleted.")

for survey_num in range(survey_total):
    survey_sheet_name = f"Quiz_{survey_num + 1}"

    if survey_num <= 18:
        condition_label = "Control"
        visible_columns = initial_information + base_features
    elif survey_num <= 37:
        condition_label = "Health"
        visible_columns = initial_information + base_features + health_features
    elif survey_num <= 56:
        condition_label = "Finance"
        visible_columns = initial_information + base_features + finance_features
    else:
        condition_label = "Sensitive"
        visible_columns = initial_information + base_features + sensitive_features

    try:
        delete_unnecessary_columns(survey_sheet_name, visible_columns, condition_label)

        time.sleep(2)
    except gspread.exceptions.APIError as e:
        if 'RATE_LIMIT_EXCEEDED' in str(e):
            print("Rate limit exceeded. Waiting for 5 seconds before retrying...")
            time.sleep(5)
            delete_unnecessary_columns(survey_sheet_name, visible_columns, condition_label)
        else:
            print(f"Unexpected error while processing {survey_sheet_name}: {e}")
            break

print("Necessary columns have been selected.")

Sheet 'Quiz_1' - Unnecessary columns discovered and will be deleted.

<ipython-input-25-b00a5a2258ad>:30: DeprecationWarning: The order of arguments in worksheet.update() has changed. Please pass values first and range_name secondor used named arguments (range_name=, values=)
  worksheet.update('A1', [df.columns.tolist()] + df.values.tolist())

Sheet 'Quiz_2' - Unnecessary columns discovered and will be deleted.

<ipython-input-25-b00a5a2258ad>:30: DeprecationWarning: The order of arguments in worksheet.update() has changed. Please pass values first and range_name secondor used named arguments (range_name=, values=)
  worksheet.update('A1', [df.columns.tolist()] + df.values.tolist())

Sheet 'Quiz_3' - Unnecessary columns discovered and will be deleted.

<ipython-input-25-b00a5a2258ad>:30: DeprecationWarning: The order of arguments in worksheet.update() has changed. Please pass values first and range_name secondor used named arguments (range_name=, values=)
  worksheet.update('A1', [df.columns.tolist()] + df.values.tolist())

Sheet 'Quiz_4' - Unnecessary columns discovered and will be deleted.

<ipython-input-25-b00a5a2258ad>:30: DeprecationWarning: The order of arguments in worksheet.update() has changed. Please pass values first and range_name secondor used named arguments (range_name=, values=)
  worksheet.update('A1', [df.columns.tolist()] + df.values.tolist())

Sheet 'Quiz_5' - Unnecessary columns discovered and will be deleted.

<ipython-input-25-b00a5a2258ad>:30: DeprecationWarning: The order of arguments in worksheet.update() has changed. Please pass values first and range_name secondor used named arguments (range_name=, values=)
  worksheet.update('A1', [df.columns.tolist()] + df.values.tolist())

# Add corresponding conditions of each profile in every survey within representative data set to the last column
import gspread
from oauth2client.service_account import ServiceAccountCredentials
import pandas as pd
import time

scope = ['https://www.googleapis.com/auth/spreadsheets',
         'https://www.googleapis.com/auth/drive.file',
         'https://www.googleapis.com/auth/drive']
creds = ServiceAccountCredentials.from_json_keyfile_name('credentials.json', scope)
client = gspread.authorize(creds)

rpt_sheet_url = 'https://docs.google.com/spreadsheets/d/1OGcWVmFb5ulAp9UEi8BV5zShPU3SP-gSEjhSllZQqb0/edit?gid=352691556#gid=352691556'
sheet = client.open_by_url(rpt_sheet_url)

sheet_names = [f"Quiz_{i}" for i in range(1, 41)]

for sheet_name in sheet_names:
    worksheet = sheet.worksheet(sheet_name)

    data = worksheet.get_all_records()

    df = pd.DataFrame(data)

    if 'Condition' not in df.columns:
        if 1 <= int(sheet_name.split('_')[1]) <= 10:
            df['Condition'] = 'Base'
        elif 11 <= int(sheet_name.split('_')[1]) <= 20:
            df['Condition'] = 'Base + Health'
        elif 21 <= int(sheet_name.split('_')[1]) <= 30:
            df['Condition'] = 'Base + Finance'
        elif 31 <= int(sheet_name.split('_')[1]) <= 40:
            df['Condition'] = 'Base + Sensitive'

        header = df.columns.tolist()
        worksheet.update([header] + df.values.tolist())
        print(f"Updated {sheet_name} with 'Condition' column.")

    time.sleep(2)

print("Conditions have been added to each profile in every survey.")

# Integrating each profile and condition to another sheet called integration.
# It moves values intermittently to minimize the probability of API runtime error occurrence.
import time
import gspread
from oauth2client.service_account import ServiceAccountCredentials

scope = ['https://www.googleapis.com/auth/spreadsheets',
         'https://www.googleapis.com/auth/drive.file',
         'https://www.googleapis.com/auth/drive']
creds = ServiceAccountCredentials.from_json_keyfile_name('credentials.json', scope)
client = gspread.authorize(creds)

rpt_sheet_url = 'https://docs.google.com/spreadsheets/d/1OGcWVmFb5ulAp9UEi8BV5zShPU3SP-gSEjhSllZQqb0/edit?gid=352691556#gid=352691556'
spreadsheet = client.open_by_url(rpt_sheet_url)

try:
    integration_sheet = spreadsheet.add_worksheet(title="integration", rows="1000", cols="26")  # Default 26 columns for flexibility
except Exception as e:
    integration_sheet = spreadsheet.worksheet("integration")

integration_sheet.update("A1:B1000", [[""]] * 1000)
integration_sheet.update('A1', [["id", "Condition"]])

row_index = 2

for i in range(66, 77): # If API runtime error occurs, delete the incomplete area and adjust the range
    sheet_name = f"Quiz_{i}"
    quiz_sheet = spreadsheet.worksheet(sheet_name)
    data = quiz_sheet.get_all_values()[1:]
    rows_to_add = []
    for row in data:
        rows_to_add.append([row[1], row[-1]])
    cell_range = f"A{row_index}:B{row_index + len(rows_to_add) - 1}"
    try:
        integration_sheet.update(cell_range, rows_to_add)
    except Exception as e:
        time.sleep(5)
        integration_sheet.update(cell_range, rows_to_add)
    row_index += len(rows_to_add)
    time.sleep(1)

<ipython-input-31-4840b0e9d117>:21: DeprecationWarning: The order of arguments in worksheet.update() has changed. Please pass values first and range_name secondor used named arguments (range_name=, values=)
  integration_sheet.update("A1:B1000", [[""]] * 1000)
<ipython-input-31-4840b0e9d117>:22: DeprecationWarning: The order of arguments in worksheet.update() has changed. Please pass values first and range_name secondor used named arguments (range_name=, values=)
  integration_sheet.update('A1', [["id", "Condition"]])
<ipython-input-31-4840b0e9d117>:35: DeprecationWarning: The order of arguments in worksheet.update() has changed. Please pass values first and range_name secondor used named arguments (range_name=, values=)
  integration_sheet.update(cell_range, rows_to_add)

# Add corresponding conditions of each profile in Sheet 3 of update counter file to the last column
# Changing less than 5 surveys each time running can minimize API error probability
import gspread
from oauth2client.service_account import ServiceAccountCredentials
import time

scope = ['https://www.googleapis.com/auth/spreadsheets', 'https://www.googleapis.com/auth/drive.file', 'https://www.googleapis.com/auth/drive']
creds = ServiceAccountCredentials.from_json_keyfile_name('credentials.json', scope)
client = gspread.authorize(creds)

rpt_sheet_url = 'https://docs.google.com/spreadsheets/d/1OGcWVmFb5ulAp9UEi8BV5zShPU3SP-gSEjhSllZQqb0/edit?gid=0#gid=0'
update_sheet_url = 'https://docs.google.com/spreadsheets/d/1OSoeG07uhYDsYEUP38Rc6bMwJySvflqGWfryTjx1gIo/edit?gid=2023416044#gid=2023416044'

rpt_sheet = client.open_by_url(rpt_sheet_url)
update_sheet = client.open_by_url(update_sheet_url).worksheet("Sheet4")

integration_sheet = rpt_sheet.worksheet("integration")

integration_data = integration_sheet.get_all_values()[1:]
integration_dict = {str(row[0]): row[1] for row in integration_data}

sheet3_data = update_sheet.get_all_values()
header = sheet3_data[0]
rows = sheet3_data[1:]

if 'Condition' not in header:
    header.append('Condition')
    update_sheet.update('A1', [header])

updated_rows = []
for row in rows:
    person_id = str(row[0])
    condition = integration_dict.get(person_id, 'Unknown')
    if len(row) < len(header):
        row.append(condition)
    else:
        row[-1] = condition
    updated_rows.append(row)

update_sheet.update(f"A2:{chr(64 + len(header))}{len(rows) + 1}", updated_rows)

print("Conditions have been added to the Sheet4 of update counter table.")

<ipython-input-4-9c58a3e09918>:28: DeprecationWarning: The order of arguments in worksheet.update() has changed. Please pass values first and range_name secondor used named arguments (range_name=, values=)
  update_sheet.update('A1', [header])
<ipython-input-4-9c58a3e09918>:40: DeprecationWarning: The order of arguments in worksheet.update() has changed. Please pass values first and range_name secondor used named arguments (range_name=, values=)
  update_sheet.update(f"A2:{chr(64 + len(header))}{len(rows) + 1}", updated_rows)

Conditions have been added to the Sheet4 of update counter table.

def create_description(data_row):
    # Basic features
    name = data_row['NAME']
    id = data_row['id']

    gender = data_row['GENDER'].lower()
    if gender == 'male':
        pronoun = 'He'
        pronoun2 = 'His'
    else:
        pronoun = 'She'
        pronoun2 = 'Her'

    racethn = data_row['RACETHN']
    if racethn == "Other race":
        racethn = "unidentified"

    educcat5 = data_row['EDUCCAT5'].lower()
    if educcat5 == 'less than hs':
        educcat5 = 'a less than high school'
    elif educcat5 == 'hs grad':
        educcat5 = 'a high school graduate'
    elif educcat5 == 'some college':
        educcat5 = 'some college'
    elif educcat5 == 'College grad':
        educcat5 = 'a college graduate'
    else:
        educcat5 = 'a postgraduate'

    marital_acs = data_row['MARITAL_ACS'].lower()
    if marital_acs == 'never married':
        marital_acs = 'has never married'
    elif marital_acs == 'now married':
        marital_acs = 'is now married'
    else:
        marital_acs = 'is divorced'

    childrencat = data_row['CHILDRENCAT'].lower()
    if childrencat == "no children":
        childrencat = "doesn't have"
    else:
        childrencat = "has"

    born_acs = data_row['BORN_ACS']
    if born_acs == 'Inside the United States':
      born_acs = 'inside the U.S.'
    else:
      born_acs = 'outside the U.S.'

    age_int = data_row['AGE_INT']

    citizen_rec = data_row['CITIZEN_REC'].lower()
    if citizen_rec == 'Yes, a U.S. citizen':
        citizen_rec = "US citizen"
    else:
        citizen_rec = "non-US citizen"

    division = data_row['DIVISION']

    division_to_common_name = {
    'Pacific': 'the West Coast',
    'Mountain': 'the Mountain States',
    'West North Central': 'the Upper Midwest',
    'West South Central': 'the South Central',
    'East North Central': 'the Great Lakes region',
    'East South Central': 'the Deep South',
    'South Atlantic': 'the South',
    'Middle Atlantic': 'the Northeast',
    'New England': 'New England'
}

    if division in division_to_common_name:
        division = division_to_common_name[division]
    else:
        division = ""

    base_description = (
        f"{name} has the following attributes:\r"
        f"• {pronoun2} age is between {age_int} years.\r"
        f"• {pronoun} is of {racethn} descent.\r"
        f"• {pronoun2} gender is {gender}.\r"
        f"• {pronoun} has {educcat5} level education.\r"
        f"• {pronoun} {marital_acs}.\r"
        f"• {pronoun} {childrencat} children.\r"
        f"• {pronoun} is a {citizen_rec}.\r"
        f"• {pronoun} was born {born_acs}\r"
        f"• {pronoun} resides in {division} region of the U.S.\r"
    )

    # Health features
    hiv = data_row.get('HIV_STAT', None)
    if hiv:
        hiv = hiv.lower()

    pregnancy = data_row.get('PREG_STAT', None)
    if pregnancy:
        if gender == 'male':
          pregnancy = ""
        else:
            pregnancy = data_row.get('PREG_STAT', None)
            if pregnancy == 'Positive':
                pregnancy = "• She is pregnant.\r"
            else:
                pregnancy = "• She is not pregnant.\r"

    num_chronic_illness = data_row.get('NumChronicIllness', None)
    if isinstance(num_chronic_illness, (int, float)):
        num_chronic_illness = str(num_chronic_illness)
        num_chronic_illness = num_chronic_illness.lower()
        if num_chronic_illness == '0':
            chronill = 'not chronically ill'
        else:
            chronill = 'chronically ill'

    if hiv and num_chronic_illness:
        health_description = (
            f"• {pronoun2} HIV status is {hiv}.\r"
            f"{pregnancy}"
            f"• {pronoun} is {chronill}.\r"
        )
    else:
        health_description = ""

    # finance features
    faminc5 = data_row.get('FAMINC5', None)
    if faminc5:
        faminc5.lower()
        if faminc5 == '$20K to less than $40K':
            faminc5 = 'between $20K to less than $40K'
        elif faminc5 == '$40K to less than $75K':
            faminc5 = 'between $40K to less than $75K'
        elif faminc5 == '$75K to less than $150K':
            faminc5 = 'between $75K to less than $150K'
        else:
            None

    cred = data_row.get('CC_NUM', None)
    if cred:
        if isinstance(cred, (int, float)):
            cred = str(cred)
        cred = cred.lower()
        if cred != 0:
            cred = "has"
        else:
            cred = "does not have"

    fdstmp_cps =  data_row.get('FDSTMP_CPS', None)
    if fdstmp_cps == 'yes':
        fdstmp_cps = "currently receives foodstamps"
    else:
        fdstmp_cps = "does not currently receive foodstamps"

    if faminc5 and cred and fdstmp_cps:
        finance_description = (
            f"• {pronoun2} family's annual income is {faminc5}.\r"
            f"• {pronoun} {cred} a credit card.\r"
            f"• {pronoun} {fdstmp_cps}.\r"
        )
    else:
        finance_description = ""

    # sensitive features
    sexuality = data_row.get('SEXUALITY', None)
    if sexuality:
        sexuality = sexuality.lower()

    owngun_gss = data_row.get('OWNGUN_GSS', None)
    if owngun_gss == 'Yes':
        owngun_gss = "owns a gun"
    else:
        owngun_gss = "doesn't own a gun"

    religcat = data_row.get('RELIGCAT', None)

    if sexuality and religcat and owngun_gss:
        sensitive_description = (
            f"• {pronoun} is {sexuality}.\r"
            f"• {pronoun} {owngun_gss}.\r"
            f"• {pronoun} is {religcat}.\r"
        )
    else:
      sensitive_description = ""

    description = base_description + health_description + finance_description + sensitive_description

    return description

# Mute it after creating forms and URLs
'''
from google.oauth2 import service_account
from googleapiclient.discovery import build
import gspread

# Authentication and service setup
service_account_key_path = 'credentials.json'
credentials = service_account.Credentials.from_service_account_file(
    service_account_key_path,
    scopes=['https://www.googleapis.com/auth/drive', 'https://www.googleapis.com/auth/spreadsheets']
)
client = gspread.authorize(credentials)
forms_service = build("forms", "v1", credentials=credentials)

# Open the source and destination spreadsheets
source_spreadsheet = client.open_by_url('https://docs.google.com/spreadsheets/d/1OGcWVmFb5ulAp9UEi8BV5zShPU3SP-gSEjhSllZQqb0/edit?gid=0#gid=0')
destination_spreadsheet = client.open_by_url('https://docs.google.com/spreadsheets/d/12fMUJYC8ZpayamsyzzpcRK8XJLE2WU-spBYLOTr-jBs/edit?gid=0#gid=0')
summary_worksheet = destination_spreadsheet.sheet1

# Prepare the destination worksheet
summary_worksheet.update('A1:B1', [["Quiz Name", "Form URL"]])

# Iterate through each survey, create forms, and log URLs
for i, worksheet in enumerate(source_spreadsheet.worksheets()[1:]):
    survey_name = worksheet.title
    form_body = {"info": {"title": f"Harm Scores Survey: {survey_name}"}}
    result = forms_service.forms().create(body=form_body).execute()
    form_id = result["formId"]
    form_url = f"https://docs.google.com/forms/d/{form_id}/edit"
    summary_worksheet.append_row([survey_name, form_url])

print("All surveys and form URLs have been logged.")
'''

<ipython-input-36-32025a29155f>:22: DeprecationWarning: The order of arguments in worksheet.update() has changed. Please pass values first and range_name secondor used named arguments (range_name=, values=)
  summary_worksheet.update('A1:B1', [["Quiz Name", "Form URL"]])

All surveys and form URLs have been logged.

import re
import pandas as pd
import time

summary_worksheet = destination_spreadsheet.worksheet('Sheet1')
form_urls_records = summary_worksheet.get_all_records()

for record in form_urls_records:
    quiz_name = record['Quiz Name']
    form_url = record['Form URL']

    form_id_match = re.search(r'/d/(.*?)/edit', form_url)
    if form_id_match:
        form_id = form_id_match.group(1)
        matching_worksheet = source_spreadsheet.worksheet(quiz_name)
        # Assuming each row in the worksheet is a person's information
        person_records = matching_worksheet.get_all_records()
        if not person_records:
            print(f"No data found in the worksheet for quiz: {quiz_name}")
            continue

        df_worksheet = pd.DataFrame(person_records)

        df_worksheet['Description'] = ''

        for index, row in df_worksheet.iterrows():
            full_description = create_description(row)
            df_worksheet.at[index, 'Description'] = full_description

        update_requests = []

        first_request = {
            "createItem": {
                "item": {
                    "textItem": {},
                    "title": f"We are trying to quantify harm from the release of personal information. Please answer each question to the best of your abilities. For each subject, read the entire profile—sometimes, the harm might (or might not) be context-dependent.\
                              \r\rAll of the people included in this survey are fake (synthetic)—we are not showing you real data about real people. This study is IRB approved."
                },
                "location": {"index": 0},
            },
        }
        update_requests.append(first_request)

        break_request = {
            "createItem": {
                "item": {
                    "pageBreakItem": {},
                  },
                  "location": {"index": 1},
            },
        }

        update_requests.append(break_request)

        prolific_id =  {
             "createItem": {
                 "item": {
                    "title": "What is your Prolific ID?",
                       "questionItem": {
                            "question": {
                                "required": True,
                                "textQuestion": {
                                },

                            },
                        },
                    },
                    "location": {"index": 2},
                },
            }

        update_requests.append(prolific_id)

        break_request = {
            "createItem": {
                "item": {
                    "pageBreakItem": {},
                  },
                  "location": {"index": 3},
            },
        }

        update_requests.append(break_request)

        question_number = 1
        location_index = 4
        row_count = 0

        for index, row in df_worksheet.iterrows():
            row_count += 1
            description_value = row['Description']
            id = row['id']
            description_request = {
                "createItem": {
                    "item": {
                        "textItem": {},
                        "title": f"Sample person {question_number}\r\r{description_value}"
                    },
                    "location": {"index": location_index},
                },
            }

            update_requests.append(description_request)
            question_number += 1
            location_index += 1

            attention_check =  {
                "createItem": {
                    "item": {
                        "title": "What is this person's ID number? (Please select the given answer; this question is purely for data processing purposes.)",
                        "questionItem": {
                            "question": {
                                "required": True,
                                "choiceQuestion": {
                                    "type": "RADIO",
                                    "options": [
                                        {"value": f"{id}"},
                                    ],
                                },
                            },
                        },
                    },
                    "location": {"index": location_index},
                },
            }

            update_requests.append(attention_check)
            location_index += 1

            if row_count == 10:
                attention_check2 = {
                "createItem": {
                    "item": {
                        "title": "What do you hope elected leaders in Washington do in the next year? This is an attention check question.",
                          "questionItem": {
                                "question": {
                                    "required": True,
                                    "textQuestion": {
                                    },

                                },
                            },
                        },
                        "location": {"index": location_index},
                    },
                }
                update_requests.append(attention_check2)
                location_index += 1

            else:
                attention_check2 = {}

            if row_count == 20:
                attention_check3 =  {
                "createItem": {
                    "item": {
                        "title": "To ensure that you have read this question carefully, please type “Green” as your answer. This is an attention check question.",
                          "questionItem": {
                                "question": {
                                    "required": True,
                                    "textQuestion": {
                                    },

                                },
                            },
                        },
                        "location": {"index": location_index},
                    },
                }
                update_requests.append(attention_check3)
                location_index += 1

            else:
                attention_check3 = {}

            name = row['NAME']
            gender = row['GENDER'].lower()
            if gender == 'male':
                pronoun2 = 'his'
            else:
                pronoun2 = 'her'

            q1_request = {
                "createItem": {
                    "item": {
                        "title": f"How harmful do you think most people in the U.S. would consider it if hackers or cybercriminals stole {name}'s personal data in a data breach?",
                        "questionItem": {
                            "question": {
                                "required": True,
                                "choiceQuestion": {
                                    "type": "RADIO",
                                    "options": [
                                        {"value": 'Not at all harmful'},
                                        {"value": 'Slightly harmful'},
                                        {"value": 'Moderately harmful'},
                                        {"value": 'Very harmful'},
                                        {"value": 'Extremely harmful'},
                                    ],
                                },
                            },
                        },
                    },
                    "location": {"index": location_index},
                },
            }
            update_requests.append(q1_request)
            location_index += 1

            q2_request = {
                "createItem": {
                    "item": {
                        "title": f"How harmful do you think most people in the U.S. would consider it if government agencies accessed {name}'s personal information without {pronoun2} knowledge or consent?",
                        "questionItem": {
                            "question": {
                                "required": True,
                                "choiceQuestion": {
                                    "type": "RADIO",
                                    "options": [
                                        {"value": 'Not at all harmful'},
                                        {"value": 'Slightly harmful'},
                                        {"value": 'Moderately harmful'},
                                        {"value": 'Very harmful'},
                                        {"value": 'Extremely harmful'},
                                    ],
                                },
                            },
                        },
                    },
                    "location": {"index": location_index},
                },
            }
            update_requests.append(q2_request)
            location_index += 1

            q3_request = {
                "createItem": {
                    "item": {
                        "title": f"How harmful do you think most people in the U.S. would consider it if a corporation collected and used {name}'s personal data for {pronoun2} own purposes without {pronoun2} consent?",
                        "questionItem": {
                            "question": {
                                "required": True,
                                "choiceQuestion": {
                                    "type": "RADIO",
                                    "options": [
                                        {"value": 'Not at all harmful'},
                                        {"value": 'Slightly harmful'},
                                        {"value": 'Moderately harmful'},
                                        {"value": 'Very harmful'},
                                        {"value": 'Extremely harmful'},
                                    ],
                                },
                            },
                        },
                    },
                    "location": {"index": location_index},
                },
            }
            update_requests.append(q3_request)
            location_index += 1

            q4_request = {
                "createItem": {
                    "item": {
                        "title": f"How harmful do you think most people in the U.S. would consider it if {name}'s employer or colleagues viewed {pronoun2} personal data without {pronoun2} permission?",
                        "questionItem": {
                            "question": {
                                "required": True,
                                "choiceQuestion": {
                                    "type": "RADIO",
                                    "options": [
                                        {"value": 'Not at all harmful'},
                                        {"value": 'Slightly harmful'},
                                        {"value": 'Moderately harmful'},
                                        {"value": 'Very harmful'},
                                        {"value": 'Extremely harmful'},
                                    ],
                                },
                            },
                        },
                    },
                    "location": {"index": location_index},
                },
            }
            update_requests.append(q4_request)
            location_index += 1

            q5_request = {
                "createItem": {
                    "item": {
                        "title": f"How harmful do you think most people in the U.S. would consider it if {name}'s personal information was inadvertently shared with {pronoun2} family members without {pronoun2} consent?",
                        "questionItem": {
                            "question": {
                                "required": True,
                                "choiceQuestion": {
                                    "type": "RADIO",
                                    "options": [
                                        {"value": 'Not at all harmful'},
                                        {"value": 'Slightly harmful'},
                                        {"value": 'Moderately harmful'},
                                        {"value": 'Very harmful'},
                                        {"value": 'Extremely harmful'},
                                    ],
                                },
                            },
                        },
                    },
                    "location": {"index": location_index},
                },
            }
            update_requests.append(q5_request)
            location_index += 1

            q6_request = {
                "createItem": {
                    "item": {
                        "title": f"How harmful do you think most people in the U.S. would consider it if personal details about {name} were disclosed to {pronoun2} close friends without {pronoun2} knowledge?",
                        "questionItem": {
                            "question": {
                                "required": True,
                                "choiceQuestion": {
                                    "type": "RADIO",
                                    "options": [
                                        {"value": 'Not at all harmful'},
                                        {"value": 'Slightly harmful'},
                                        {"value": 'Moderately harmful'},
                                        {"value": 'Very harmful'},
                                        {"value": 'Extremely harmful'},
                                    ],
                                },
                            },
                        },
                    },
                    "location": {"index": location_index},
                },
            }
            update_requests.append(q6_request)
            location_index += 1

            q7_request = {
                "createItem": {
                    "item": {
                        "title": f"How harmful do you think most people in the U.S. would consider it if {name}'s personal data was accessed by acquaintances who know {name} by face and name but don't interact with {name} regularly?",
                        "questionItem": {
                            "question": {
                                "required": True,
                                "choiceQuestion": {
                                    "type": "RADIO",
                                    "options": [
                                        {"value": 'Not at all harmful'},
                                        {"value": 'Slightly harmful'},
                                        {"value": 'Moderately harmful'},
                                        {"value": 'Very harmful'},
                                        {"value": 'Extremely harmful'},
                                    ],
                                },
                            },
                        },
                    },
                    "location": {"index": location_index},
                },
            }
            update_requests.append(q7_request)
            location_index += 1

            q8_request = {
                "createItem": {
                    "item": {
                        "title": f"How harmful do you think most people in the U.S. would consider it if {name}'s personal information was made publicly available on the internet for anyone to access without {pronoun2} consent?",
                        "questionItem": {
                            "question": {
                                "required": True,
                                "choiceQuestion": {
                                    "type": "RADIO",
                                    "options": [
                                        {"value": 'Not at all harmful'},
                                        {"value": 'Slightly harmful'},
                                        {"value": 'Moderately harmful'},
                                        {"value": 'Very harmful'},
                                        {"value": 'Extremely harmful'},
                                    ],
                                },
                            },
                        },
                    },
                    "location": {"index": location_index},
                },
            }
            update_requests.append(q8_request)
            location_index += 1

            break_request = {
                "createItem": {
                    "item": {
                        "pageBreakItem": {},
                    },
                    "location": {"index": location_index},
                },
            }
            update_requests.append(break_request)
            location_index += 1

            update = {"requests": update_requests}

        last_request = {
            "createItem": {
                "item": {
                    "textItem": {},
                    "title": f"Prolific code: COLYGFMB"
                },
                "location": {"index": location_index},
            },
        }
        update_requests.append(last_request)
        location_index += 1

        try:
          t = "Harm Scores Survey"
          forms_service.forms().batchUpdate(formId=form_id, body={"requests": update_requests}).execute()

          drive_service = build('drive', 'v3', credentials=credentials)
          # Temporarily muted

          drive_service.permissions().create(
              fileId=form_id,
              body={
                  'role': 'writer',
                  'type': 'user',
                  'emailAddress': 'shreya.kochar1@gmail.com'
              }
          ).execute()

          print(f"Form updated successfully: {form_id}")
        except Exception as e:
          print(f"Unable to update form {form_id}: {e}")
    else:
        print(f"Unable to extract form ID from URL: {form_url}")
    # Program delay
    time.sleep(2)

Form updated successfully: 1Wrfzzm8GeDNKBQsEH4h2UUtzRzhSmj1c02aQaJkBI6s
Form updated successfully: 17seIXv0sOCcGT2s4Ah2mkkL086vRWVaf6kaXqXl-Dzw
Form updated successfully: 1JHYAiMRe_gLQyNFn4U22cA0WuuBTFLfsvAKMgSmoLXg
Form updated successfully: 1hTA4DRe7s89xwvADnZLNti814zsC0K-PgLB4n8GoUpI
Form updated successfully: 1EGnzEvl-Y6oDOUlX1sLYjLjK3QB6DVM5NrlwfAD9l2w
Form updated successfully: 1Tvj4A_kbEieqUSaLEAPSXyijQKp-RyDQFqDIiaya1OE
Form updated successfully: 1nq3rp6okjDp6-ykr3Cb4Sr-eAzlXJ2bZUMJJrpULUbg
Form updated successfully: 1KWFeBj4yxaSSXhnUsoj38UJMx-4qgHclvhNn2JmdaaE
Form updated successfully: 1aJ2IgCSh3pajXpxPOeXSfpJ-gnYcH2RAZxT9XskRC54
Form updated successfully: 1Nipu71J8H9dKOfFAnGmT18MFV_hQBxXMjkbD_PgJY3Y
Form updated successfully: 113ApJyceab8b6rGq0aoQXDeF7zWUHKMT_J41PFGlfvs
Form updated successfully: 1Pp6X8sQgno5JAlboTJcim7qzgQ7CuK5K2STIQuuf2-s
Form updated successfully: 16dzWs1GBFdESLi9dI2yx6siITik0JbrRKwdD9LIfMbs
Form updated successfully: 1Y6mKTuHwq436qryhsQzhJTErOclcRpP2eFsaGA8c6e4
Form updated successfully: 17VT1pAC2suPtlLm0KzxUI9W_Re8nX4AzMKuakYLGAR0
Form updated successfully: 1jBINeal587U1WvF7McXOU6DE0SFEcLfQmql5okX3KRA
Form updated successfully: 1S5DBWZIsenzzlR8oCmCXI7Z4ewWafCgfA6rwllsb_24
Form updated successfully: 1bIBjipXlppgfcqJ085Fz1zoBANJh_50uw_G2FRHj5AM
Form updated successfully: 1CRLdS9tFu1qRWhJORYRlq9_rygBlqAvpKFSaXnQMMyY
Form updated successfully: 168x9HBGV_n7dy6WFdJxv4fYtv5NHkMG7VXJZmCdQQW0
Form updated successfully: 1ataLuMJynbE0rCed_ozMFwX-mF4MPjpBr0EUyhi_WGU
Form updated successfully: 1utzIm5L1MSH8S98y7xjZffgFk0-qxUJEfeBEqUsVDpU
Form updated successfully: 1xZVOjUN87a4grVetzXs61Glg0PhCbY1hDae_GjnIFeI
Form updated successfully: 1pocKFVmAHDqYVvpww3Xan8eK0Af087AQVTX4Gcdf2P0
Form updated successfully: 1gcx-4wyNLQr8samszcZwcG8zqwHGgv3oZAYJ6Ewzz3A
Form updated successfully: 1DB13Q2tOuVrA2Bd7tbN53wLAKyw8BqqDCD4ldQdmzK4
Form updated successfully: 1l-iwkMK9nZ77ptnELTuxFp6EnmvJTPlYwdjnzLTXV8o
Form updated successfully: 1oz9Ubo2ni1E_I58c0gZyQuDRS5-Q3t5T5s7wLpoY3FA
Form updated successfully: 1c7lfXlyC7QUyWLH9NlPSqYb25b0u0ajjYVB8ckwjqg8
Form updated successfully: 1UIyYQ75V0yQhF8pcMUq7CDsl7dFFh8mm7VkrXHvGg5E
Form updated successfully: 1qngG5Z9SSGx-4adpb1mTiSrPr9leIJLooSSWrYm1LT8
Form updated successfully: 1I7TJtrzUJ-QI7EreJb8uQbLPnG3Q8vyDxz6kGk8GFuk
Form updated successfully: 1yz-Xh6qEApabcSxCOYEwNNb1I1PbU2uLw_EM9xC3CqE
Form updated successfully: 1RKqnwF4TFWBA4INU5GAvo04bGd6Fpb_mxd2cgoBOY6M
Form updated successfully: 1moV1l7Jp6-nW-NpGKxiFGhPtTUetnahVwzMdrzOLRIo
Form updated successfully: 1EcVcMNdgHkyxNKxGxHeTUqrJf6ZvSnBkenxoHJuC0Rs
Form updated successfully: 1Il3UiaW42jLiSkQn9Lkimg9u95CDg4o8UfK9pQt7Gs0
Form updated successfully: 1aG4BcuYqWaM_vv_f_hg-hb8qk5i1Ni2HMlZmaOvV2d4
Form updated successfully: 1ZP3ivl1Ia_RrRlrTZKal4Mi93Op3YeweNJIEQMPYxao
Form updated successfully: 11mjX9JP8sdhSuZPfUD4NTJbSSwBQr8-GxoG7rjMY3-E
Form updated successfully: 1-tmZrTf5ZrGhqE1QAOcNpk42-NBsa-h7wOL_2pd58Ok

# Muted after creating a sheet with certainty
'''
from google.colab import auth
from google.oauth2 import service_account
from googleapiclient.discovery import build
import gspread
from oauth2client.service_account import ServiceAccountCredentials
import pandas as pd
from gspread_dataframe import set_with_dataframe

# Added: representative sheet with certainty access
auth.authenticate_user()
scope = ['https://www.googleapis.com/auth/spreadsheets', 'https://www.googleapis.com/auth/drive.file', 'https://www.googleapis.com/auth/drive']
creds = ServiceAccountCredentials.from_json_keyfile_name('credentials.json', scope)
client = gspread.authorize(creds)
rpt_sheet_cty_url = 'https://docs.google.com/spreadsheets/d/1vzWPqmy_HcM_Ntowg4V5HD307ZGFS0XXBInrXmN-xYo/edit?gid=0#gid=0'
rpt_sheet_cty = client.open_by_url(rpt_sheet_cty_url)
worksheet = rpt_sheet_cty.worksheet("selected_data")
data = worksheet.get_all_records()
df = pd.DataFrame(data)

def replicate_rows_with_certainty(row):
    replicated_rows = []
    for certainty in [50, 75, 100]:
        new_row = row.copy()
        new_row['certainty'] = certainty
        replicated_rows.append(new_row)
    return replicated_rows

transformed_rows = []
for _, row in df.iterrows():
    transformed_rows.extend(replicate_rows_with_certainty(row))

transformed_df = pd.DataFrame(transformed_rows)

worksheet.clear()
columns = transformed_df.columns.values.tolist()
worksheet.update([columns] + transformed_df.values.tolist())

transformed_df.head()
'''

# Temporarily muted
'''
import scipy.stats as stats

anova_df = df_updated[['RACETHN', 'AGE_INT', 'SEXUALITY', 'EDUCCAT5', 'scores']].dropna()
groups = [group['scores'].values for name, group in anova_df.groupby(['EDUCCAT5'])]

# Perform ANOVA
f_stat, p_value = stats.f_oneway(*groups)
print('ANOVA result: F-statistic = {:.2f}, p-value = {:.3f}'.format(f_stat, p_value))
'''

# df_updated.to_csv('harm3.csv', index=False)

		NAME	id	GENDER	AGE	RACETHN	EDUCCAT5	DIVISION	MARITAL_ACS	HHSIZECAT	...	CI_LABEL_OWNGUN_GSS	CI_LABEL_SEXUALITY	CI_LABEL_HIV_STAT	CI_LABEL_PREG_STAT	CI_LABEL_CC_NUM	CI_LABEL_cc_encoded	CI_LABEL_cc_disclosed	CI_LABEL_NumChronicIllness	AGE_INT	Cluster
0	0	Luke Walsh	1	Male	25	White non-Hispanic	Some college	Mountain	Never married	3+	...	is	is	is probably	is probably	is	is probably	is possibly	is probably	20-29	5
1	1	Matilde Izaguirre Checa	2	Female	70	Hispanic	HS Grad	West South Central	Divorced	1	...	is probably	is possibly	is	is	is	is	is	is possibly	70-79	11
2	2	Ryan Smith	3	Male	85	White non-Hispanic	Less than HS	Middle Atlantic	Now married	2	...	is possibly	is probably	is probably	is	is probably	is probably	is probably	is probably	80-89	2
3	3	Matthew Grimes	4	Male	59	White non-Hispanic	HS Grad	Mountain	Now married	2	...	is probably	is probably	is probably	is	is possibly	is probably	is	is probably	50-59	12
4	4	Miraan Rama	5	Female	19	Asian	Some college	Pacific	Never married	1	...	is probably	is	is probably	is	is probably	is probably	is probably	is	10-19	9

		NAME	id	GENDER	AGE	RACETHN	EDUCCAT5	DIVISION	MARITAL_ACS	HHSIZECAT	...	CI_LABEL_OWNGUN_GSS	CI_LABEL_SEXUALITY	CI_LABEL_HIV_STAT	CI_LABEL_PREG_STAT	CI_LABEL_CC_NUM	CI_LABEL_cc_encoded	CI_LABEL_cc_disclosed	CI_LABEL_NumChronicIllness	AGE_INT	Cluster
0	3413	Karen Campbell	3414	Female	85	White non-Hispanic	HS Grad	East South Central	Widowed	1	...	is probably	is probably	is probably	is probably	is probably	is probably	is probably	is possibly	80-89	0
1	10657	Lori Delacruz	10658	Female	80	White non-Hispanic	HS Grad	South Atlantic	Widowed	1	...	is possibly	is	is probably	is	is probably	is probably	is probably	is probably	80-89	0
2	1669	Shannon Mcdonald	1670	Female	80	White non-Hispanic	Some college	South Atlantic	Widowed	2	...	is probably	is	is possibly	is probably	is probably	is	is probably	is	80-89	0
4	2285	Jennifer Phillips	2286	Female	80	White non-Hispanic	HS Grad	South Atlantic	Divorced	2	...	is probably	is possibly	is probably	is probably	is possibly	is probably	is possibly	is probably	80-89	0
6	7536	Alondra Benavente Sales	7537	Female	42	Hispanic	Some college	South Atlantic	Now married	3+	...	is probably	is probably	is	is probably	is possibly	is	is	is possibly	40-49	0
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
2122	5102	Kathleen Chaney	5103	Female	20	White non-Hispanic	Some college	New England	Never married	3+	...	is	is probably	is probably	is	is	is	is probably	is probably	20-29	13
2123	12570	Heather Anderson	12571	Female	26	White non-Hispanic	College grad	Middle Atlantic	Never married	1	...	is probably	is probably	is	is probably	is possibly	is	is probably	is probably	20-29	13
2125	4852	Lisa Lang	4853	Female	24	White non-Hispanic	HS Grad	West North Central	Never married	3+	...	is probably	is unlikely but might be	is probably	is possibly	is probably	is	is probably	is	20-29	13
2126	13247	Kayla Mcdonald	13248	Female	21	White non-Hispanic	Some college	South Atlantic	Never married	3+	...	is	is probably	is unlikely but might be	is possibly	is	is probably	is probably	is possibly	20-29	13
2127	14580	Cindy Hodges	14581	Female	24	White non-Hispanic	Some college	Pacific	Never married	3+	...	is possibly	is probably	is	is unlikely but might be	is possibly	is	is	is probably	20-29	13

		NAME	id	GENDER	AGE	RACETHN	EDUCCAT5	DIVISION	MARITAL_ACS	HHSIZECAT	...	CI_LABEL_OWNGUN_GSS	CI_LABEL_SEXUALITY	CI_LABEL_HIV_STAT	CI_LABEL_PREG_STAT	CI_LABEL_CC_NUM	CI_LABEL_cc_encoded	CI_LABEL_cc_disclosed	CI_LABEL_NumChronicIllness	AGE_INT
0	3413	Karen Campbell	3414	Female	85	White non-Hispanic	HS Grad	East South Central	Widowed	1	...	is probably	is probably	is probably	is probably	is probably	is probably	is probably	is possibly	80-89
1	10657	Lori Delacruz	10658	Female	80	White non-Hispanic	HS Grad	South Atlantic	Widowed	1	...	is possibly	is	is probably	is	is probably	is probably	is probably	is probably	80-89
2	1669	Shannon Mcdonald	1670	Female	80	White non-Hispanic	Some college	South Atlantic	Widowed	2	...	is probably	is	is possibly	is probably	is probably	is	is probably	is	80-89
3	15793	Albina Cobo Manzano	15794	Female	36	Hispanic	College grad	Pacific	Now married	2	...	is	is probably	is probably	is probably	is	is probably	is probably	is	30-39
4	2285	Jennifer Phillips	2286	Female	80	White non-Hispanic	HS Grad	South Atlantic	Divorced	2	...	is probably	is possibly	is probably	is probably	is possibly	is probably	is possibly	is probably	80-89

		NAME	id	GENDER	AGE	RACETHN	EDUCCAT5	DIVISION	MARITAL_ACS	HHSIZECAT	...	SEXUALITY	HIV_STAT	PREG_STAT	CC_NUM	cc_encoded	cc_disclosed	NumChronicIllness	AGE_INT	certainty
0	4775	Joseph Burton	4776	Male	36	White non-Hispanic	College grad	West South Central	Now married	3+	...	Heterosexual	negative	Not Applicable	2.237470e+15	1	1	2	30-39	50
0	4775	Joseph Burton	4776	Male	36	White non-Hispanic	College grad	West South Central	Now married	3+	...	Heterosexual	negative	Not Applicable	2.237470e+15	1	1	2	30-39	75
0	4775	Joseph Burton	4776	Male	36	White non-Hispanic	College grad	West South Central	Now married	3+	...	Heterosexual	negative	Not Applicable	2.237470e+15	1	1	2	30-39	100
1	3539	Joseph Payne	3540	Male	50	White non-Hispanic	HS Grad	Middle Atlantic	Now married	3+	...	Heterosexual	negative	Not Applicable	5.273270e+15	1	0	1	50-59	50
1	3539	Joseph Payne	3540	Male	50	White non-Hispanic	HS Grad	Middle Atlantic	Now married	3+	...	Heterosexual	negative	Not Applicable	5.273270e+15	1	0	1	50-59	75