Contact
Situational Attributes of Risk Taking
Data scientist in Basel
1403
post-template-default,single,single-post,postid-1403,single-format-standard,bridge-core-3.1.8,qode-page-transition-enabled,ajax_fade,page_not_loaded,,qode_grid_1300,side_area_uncovered_from_content,overlapping_content,qode-content-sidebar-responsive,qode-theme-ver-30.5,qode-theme-bridge,disabled_footer_top,qode_header_in_grid,wpb-js-composer js-comp-ver-7.6,vc_responsive

Situational Attributes of Risk Taking

Situational Attributes of Risk Taking

Situational Attributes for Risk Taking

How the retrieved attributes relate to domain
differences in risk taking was analyzed in an
exploratory fashion because …

The tables can be found in Appendix A of the paper.

Setup

library(data.table) # fantastic and fast data manipulation, see 
library(scales) # for percent foramt
library(lsr) # for cohens D
library(coin) # for wilcox_test
library(ltm) # for point biseral correlations
# Helper functions: download the utils folder and maybe change the path path argument
sapply(list.files(path="utils/", pattern="*.R", full.names = TRUE), source, .GlobalEnv) # Throws some code, don't worry about this.

Load the data

Obtain the data from the author

setwd("../4-Data") # Switch to your data directory
d     <- fread("res_attributes.coded.csv", key = "i,sit,i.cue,order")
d.dem <- fread("demographics.csv", select=c("i","fem"), key = "i")
d.rsk <- fread("risk.csv")


#####################################################################
# Preprocess
#####################################################################
# We coded cues with ++ xx if the content was 'more' (if the floor was dirty)
# and with -- xxx if the content was 'less' (if the floor was not clean)
# For -- switch the direction to ++
d[, orig.dir := factor(dir, levels=c("Less", "More"))]
d[, dir := ifelse(grepl("[-]{2}",cue) & dir=="Less", "More",
            ifelse(grepl("[-]{2}",cue) & dir=="More", "Less",
            dir))]
# Remove ++ and -- and assume all are coded with ++
d[, cue := sub("^ ","",sub("[-]{2}|[+]{2}","",cue))]
cat("\nPreprocessed coded attributes data.\n")

Interrater reliability

Our data contained 1,598 attributes which two
raters classified. The average interrater reliability,
pooled across domains, was …

library(openxlsx)                     # To read xlxs aspell_package_C_files

# All domains
domains <- c("bgc","wgc","sta","env","fse","fac","par","kin","mat","mre")
IRR <- sapply (domains, function(domain) {  

    # Load data
    coder1 = fread(paste0("categorized_attributes/",domain,"_JJ.csv")) #coder 1
    coder2 = fread(paste0("categorized_attributes/",domain,"_LS.csv")) #coder 2

    #  Generate a data set with tht risk-categories of the responses
    # The data also contains a code for the direction ('less'/'more'/'equally') risk-taking, we exclude those
    # Also exclude codes for being 'certain' to take/avoid the risk
    cat1 <- coder1[!grepl("certainty|less|more|equally",tolower(nam))] 
    cat2 <- coder2[!grepl("certainty|less|more|equally",tolower(nam))]

    # Transform n > 1 categories for one response into compound categories
    setkey(cat1,doc,cat)
    cat1[, cat := paste(cat, collapse="." ), by=doc]
    cat1[, nam := paste(nam, collapse=", "), by=doc]
    cat1[, txt := paste(txt, collapse=", "), by=doc]
    cat1 <- unique(cat1)
    setkey(cat2,doc,cat)
    cat2[, cat := paste(cat, collapse="." ), by=doc]
    cat2[, nam := paste(nam, collapse=", "), by=doc]
    cat2[, txt := paste(txt, collapse=", "), by=doc]
    cat2 <- unique(cat2)

    # Merge the coder's (coder1,coder2) codings by document
    # For differing codes, both codes will be kept. Per code, a row will be added, holding the code of the one coder and a NA value for the other coder
    d <- merge(cat1,cat2, by="doc", all=T, suffixes=c("1","2"))
    d <- d[, lapply(.SD, function(x) ifelse(is.na(x),"",x))] # Clean

    # Remove rows holding training documents because MethodsListSelect
    # Would bias the inter-rater-reliability upwards
    trained = read.xlsx("categorized_attributes/rater.training.results.all.domains.xlsx", sheet=domain)$docNam # load text used for the training
    trained = unique(trained[!grepl("TRAINING",trained)])
    setkey(d, doc)
    # Remove training data
    d = d[!trained]


    # Inter rater reliability coefficients
    gac = round(d[, gwet.ac1.raw(as.matrix(cbind(cat1,cat2)))][3], 2)
    # Percent Agreement
    pa = round(d[, mean(cat1==cat2)],2)

    return(data.frame(gwets.ac=gac, percent.agree=pa))

    })


IRR <- matrix(unlist(IRR), nrow = 2, dimnames = dimnames(IRR)) # Make matrix
colnames(IRR) <- expanddomains(colnames(IRR)) # More descriptive names

t(IRR) # View result
summary(IRR["gwets.ac", ]) # Summarize inter-rater reliability

Frequency of risk-taking attributes.

Because the instructions called for situational
rather than personal attributes, personal attributes
like âIf I were less a man I would …â
were excluded (8.5 % of all attributes); and
attributes for which participants reported to be
âequally likely❠to engage in the risk were excluded
(0.13%) …

# Remove "equally likely"
percent(d[dir=="Equally", .N]/d[,.N]) # 0.13%
d <- d[!dir=="Equally"] # Remove
cat("Removed 'equally-likely' attributes.\n")

# Remove "self-reference" (= personal attributes)
percent(d[tolower(cue)=="self-reference", .N]/d[,.N]) # 8.42 %
i.cue_personal <- d[tolower(cue)=="self-reference", i.cue]
d <- d[!i.cue %in% i.cue_personal]
cat("Removed 'self-reference' attributes.\n")


# Frequency of risk-taking attributes
d[, .N, by=c("i","sit")][, list(MW=mean(N), MED=median(N), SD=sd(N), min=min(N), max=max(N))][, lapply(.SD, round, digits = 2)]

# Different frequencies across domains
anova <- aov(N ~ dom, data=d[, .N, by=list(i,dom,sit)])
summary(anova)
etaSquared(anova)

Table 1

Most Frequent Attributes Across 10 Evolutionary Risk Domains

The following lines of R-code generate Table 1 of the paper

# Table 1: Frequency of risk-taking attributes
d[, ntotal := length(unique(i)), by=dom] # total N per domain
d[, ncue := length(unique(i)), by=list(dom, cue)] # total N of cues per domain
d.most.cues <- d[, .(ncue = ncue, Rel = ncue/ntotal), by=list(dom, cue)] # Relative frequ of cues per domain
d.most.cues <- unique(d.most.cues) # delete duplicate rows
d.most.cues <- d.most.cues[, .SD[order(Rel, decreasing = T)]] #order
other_rows <- d.most.cues[, grepl("^Other$|^Other or wrong sex$",cue)]
d.most.cues <- d.most.cues[!other_rows, head(.SD,3), by = dom] # top 3
d.most.dir <- d[, .(
    Less = length(unique(i[dir=="Less"])),
    More = length(unique(i[dir=="More"]))),
    by = list(dom,cue)]
d.most.cues <- merge(d.most.dir, d.most.cues, by =c("dom","cue"), all.y = TRUE)
d.most.cues[, Dir := ifelse(Less>More, "Less", ifelse(More>Less, "More", "?"))]
# Note that the names of the categories are slightly changed in the manuscript to facilitate better understanding
d.most.cues


# # Cosmetics for table in MS
# d.most.cues[, Rel := percent(round(Rel,2))]
# d.most.cues <- d.most.cues[order(dom)]
# d.most.cues[, Domain := factor(dom, labels=Vectorize(expanddomains)(levels(dom)))]
# d.most.cues <- d.most.cues[, .SD[order(Rel, decreasing = TRUE)], by = Domain]
# d.most.cues[, Domain2 := c("",levels(Domain)[Domain],""), by=Domain]
# d.most.cues[, Domain := Domain2]
# d.most.cues[, c("Domain2","dom","Less","More") := NULL]
# setcolorder(d.most.cues, c("Domain","cue", "Dir","ncue", "Rel"))
# d.most.cues
# write.table(
#     htmlTable(d.most.cues, 
#         header = c("Domain", "Cue", "Dir", "N", "Rel"),
#         rnames = FALSE),
#         file = "Table.html", row.names = F, col.names =F)
# Note that in the publication some names have chaned slightly to facilitate understanding the categories

Tally: Relative Frequency of Positive Attributes Increases Risk Seeking

Tally suggests that increased riskiness is associated
with recalling more positive than negative
attributes ignoring any semantic differences
between attributes. Overall, about …

#####################################################################
# Tally: Relative frequency of positive attributes increases risk seeking
#####################################################################
# Tally data
d.tally <- as.data.table(dcast(sit + dom + i ~ orig.dir, data = d[, .N, by=list(i,sit,orig.dir,dom)], fill = 0, value.var = "N", strings.as.factors = FALSE)) # Number of more/less attributes, note: we use the original direction
d.tally[, total := Less + More] # Total number of cues
d.tally[, percent_more := More/total]
# Merge with demographics data
setkey(d.tally,i)
d.tally <- d.dem[d.tally]

# How many positive and negative attributes did men/women report?
d.tally[, .(Median_more = median(More),
            Median_total = median(total)), by = fem]
d.tally.perPerson <- d.tally[, .(More = median(percent_more)), by = list(fem,i)]

# Gender differences reg. the rel. frequencies of positive attributes?
wilcox_test(More ~ as.factor(fem), data=d.tally.perPerson)

# Compute SRF and correlate it with risk data for subset of data
# Make sure you have sourced the functions in the utils folder at the beginning
SRF <- d.tally[, .(dom = dom, sit = sit, fem = fem, SRF = zstandardize(percent_more)), by = i]

# Unmique person x situation combinations
unique(d[, .(i, sit)])

# Merge SRF and risk domains
d.srf.rsk <- merge(SRF, d.rsk, by = c("i","sit","dom","fem"), all.x=T)
# Correlation per situation
d.srf.rsk[, round(cor(SRF,lik),2)]
# Aggregate within domains and compute correlation per domains
d.srf.rsk[, .(mwSRF=mean(SRF), mwlik=mean(lik)), by = dom][, round(cor(mwSRF,mwlik),2)]

Take the First: Retrieving a Positive Attribute as First Attribute Increases Risk Seeking

Take the first predicts that in the domains
with higher risk propensities more respondents
retrieve positive attributes first (attributes pointing
toward more risk). To explore this, we conducted
two analyses. …

#####################################################################
# LEX: Does the first cue predict risk taking?
#####################################################################
# Copy of data: data set containing only the first cue
d.lex <- copy(d)
d.lex <- d.lex[order==1] # Note: This excludes people who mentioned a
                     # "self-reference" attribute in
                     # the beginning

# Were the first attributes positive?
d.lex[, .(
    N = sum(orig.dir=="More"), # we use the original direction
    Total = .N,
    percent = percent(prop.table(table(orig.dir))["More"]))]

# Gender difference in first-positive cues
setkey(d.dem,i)
setkey(d.lex,i)
d.lex <- d.dem[d.lex]
d.lex[, is.more := as.numeric(dir=="More")]
d.lex[, .(
    MW = round(mean(is.more),2),
    SD = round(sd(is.more),2)), by = fem ]
# Test of distribution of % positive attributes equal among men/women
d.lex.perPerson <- d.lex[, .(fem=fem, dom=dom, perc.more = mean(is.more)), by = i]
wilcox_test(perc.more ~ factor(fem), data=d.lex.perPerson)

# First positive attributes and risk-taking
d.lex.rsk <- merge(d.lex, d.rsk, by = c("i","sit","dom","fem"), all.x=T)

# Correlation per situation
d.lex.rsk[, round(biserial.cor(lik,is.more),2)]
d.lex.rsk[, round(biserial.cor(lik,is.more),2), by = fem]
# Correlation per domains
d.lex.rsk[, .(mw.is.more=mean(is.more), mwlik=mean(lik)), by = dom][, round(cor(mw.is.more,mwlik),2)]
# Domain status: outlier
d.lex[dom=="sta", .(MW=percent(mean(is.more)), N=sum(is.more), of=.N)]

Most Relevant: Frequency of Attributes Determines Risk Seeking

According to most relevant, the frequency of
attributes in the real world influences domainspecific
risk propensities. The proportion of …

Important: you have to run the chunc of code for Table 1 (above) before this, to generate the d.most.cues object in the r workspace

#####################################################################
# Most Relevant
#####################################################################

# How many people mention a most-mentioned cue
round(d.most.cues[, max(Rel), by = dom][, range(V1)],2)
# If this throws an error, you maybe have not run the code to generate table 1 (which you can find above). This code generates the d.most.cues object
d.most.cues[, max(ncue), by = dom][, range(V1)]


#####################################################################
# Comparison to google trends frequencies
#####################################################################
d.trends <- fread("google_trends/googletrends.csv", select = c("most.mentioned.cue", "dom", "mean.gtrends.hits"))
# Merge with main data (per domain x cue)
d.trends[, dom.cue.key := paste(dom, most.mentioned.cue)]
d[, dom.cue.key := paste(dom, cue)]
d.most <- merge(d[, c("i","dom","dom.cue.key","cue")], d.trends[, c("dom.cue.key","mean.gtrends.hits", "most.mentioned.cue")], by = "dom.cue.key", all = TRUE)
# Aggregate to contain only 1 row per domain x individual
d.most <- d.most[, .(
    mean.gtrends.hits = mean(mean.gtrends.hits, na.rm=TRUE),
    cue = unique(na.omit(most.mentioned.cue))),
     by = list(dom,i)]
# Merge with original direction of the cue (by domain x person x cue)
d.most <- merge(d.most, d[, c("i","dom","cue", "orig.dir", "order")], by = c("dom","i","cue"), all.x = TRUE, all.y = FALSE)
tmp1 <- d.most[, .SD[which.min(order),], by = list(dom,i,cue)]
tmp2 <- d.most[is.na(order)]
d.most <- rbind(tmp1,tmp2)
# Merge with the main attribute data file 
setkey(d.dem,i)
setkey(d.most,i)
d.most <- d.dem[d.most]
# Does number of people retrieving the cue reflect gtrends?
setkey(d.most.cues, dom)
setkey(d.trends, dom)
cor.n.gtrends.mostrelevant.cue <- d.most.cues[, .(Rel = Rel[which.max(Rel)]), by = dom][d.trends][, cor(Rel, mean.gtrends.hits)]
round(cor.n.gtrends.mostrelevant.cue, 2)


#####################################################################
# Run this for the graphical comparsion
#####################################################################
# # Binary indicator
# d.most.cues[, mentions.most := as.numeric(!is.na(cue))]

# # First positive attributes merged with risk data
# d.most.rsk <- merge(d.most.cues,
#     d.rsk[, .(lik = mean(lik)), by = list(i,dom,fem)],
#     by = c("i","dom","fem"),
#     all.x = TRUE)

# Reset working directory to the dir. where your code is stored
setwd("../5-Code")

Here is some additional code that may come in handy when working with the data (not run by default)

# ########################################################################
# # Code to retrieve text examples of specific categories of attributes
##########################################################################
# d.raw <- fread("../4-Data/rawcues.csv")
# d.raw[, i.cue := paste(sit, "--", i, "--", cueNum, ".txt", sep = "")]
# d.raw <- merge(d, d.raw, by = "i.cue")
# category <- "Initiation by opponent"
# d.raw[cue.x==category, cue.y]
# d.raw[.x=="Evidence for hygene of the source"]


################################################################################
# CHECKS (not in the paper)
################################################################################
# # Not all respondents reported attributes for all situations
# # Does the risk propensity of the subset equal the full one?
# d.rsk <- fread("../4-Data/risk.csv")
# d.rsk[, dom := factor(dom, levels = rskDom)]
# d.rsk.subset <- merge(unique(d[, c("i","sit")]), d.rsk, by = c("i","sit"), all.x = TRUE)
# # Risk data
# rsk <- d.rsk[, .(mw.lik=mean(lik)), by = list(dom,fem)]
# rsk.subset <- d.rsk.subset[, .(mw.lik.subset=mean(lik)), by = list(dom,fem)]
# check <- merge(rsk, rsk.subset, by = c("dom","fem"))
# check[, cor(mw.lik, mw.lik.subset), by = fem][, lapply(V1, round, digits = 2), by = fem]
# # Number of people reporting only 1 attribute
# d[, .N, by = list(sit,i)][, all(N==1), by = i][isTRUE(V1)]

# # Check if people identical
# check_n_subset <- d.rsk.subset[, unique(i), keyby = dom][order(V1)]
# check_n <- d[, unique(i), keyby = dom][order(V1)]
# all(check_n_subset == check_n) # TRUE