09 Feb Situational Attributes of Risk Taking
Situational Attributes for Risk Taking
How the retrieved attributes relate to domain
differences in risk taking was analyzed in an
exploratory fashion because …
The tables can be found in Appendix A of the paper.
Setup
library(data.table) # fantastic and fast data manipulation, see
library(scales) # for percent foramt
library(lsr) # for cohens D
library(coin) # for wilcox_test
library(ltm) # for point biseral correlations
# Helper functions: download the utils folder and maybe change the path path argument
sapply(list.files(path="utils/", pattern="*.R", full.names = TRUE), source, .GlobalEnv) # Throws some code, don't worry about this.
Load the data
Obtain the data from the author
setwd("../4-Data") # Switch to your data directory
d <- fread("res_attributes.coded.csv", key = "i,sit,i.cue,order")
d.dem <- fread("demographics.csv", select=c("i","fem"), key = "i")
d.rsk <- fread("risk.csv")
#####################################################################
# Preprocess
#####################################################################
# We coded cues with ++ xx if the content was 'more' (if the floor was dirty)
# and with -- xxx if the content was 'less' (if the floor was not clean)
# For -- switch the direction to ++
d[, orig.dir := factor(dir, levels=c("Less", "More"))]
d[, dir := ifelse(grepl("[-]{2}",cue) & dir=="Less", "More",
ifelse(grepl("[-]{2}",cue) & dir=="More", "Less",
dir))]
# Remove ++ and -- and assume all are coded with ++
d[, cue := sub("^ ","",sub("[-]{2}|[+]{2}","",cue))]
cat("\nPreprocessed coded attributes data.\n")
Interrater reliability
Our data contained 1,598 attributes which two
raters classified. The average interrater reliability,
pooled across domains, was …
library(openxlsx) # To read xlxs aspell_package_C_files
# All domains
domains <- c("bgc","wgc","sta","env","fse","fac","par","kin","mat","mre")
IRR <- sapply (domains, function(domain) {
# Load data
coder1 = fread(paste0("categorized_attributes/",domain,"_JJ.csv")) #coder 1
coder2 = fread(paste0("categorized_attributes/",domain,"_LS.csv")) #coder 2
# Generate a data set with tht risk-categories of the responses
# The data also contains a code for the direction ('less'/'more'/'equally') risk-taking, we exclude those
# Also exclude codes for being 'certain' to take/avoid the risk
cat1 <- coder1[!grepl("certainty|less|more|equally",tolower(nam))]
cat2 <- coder2[!grepl("certainty|less|more|equally",tolower(nam))]
# Transform n > 1 categories for one response into compound categories
setkey(cat1,doc,cat)
cat1[, cat := paste(cat, collapse="." ), by=doc]
cat1[, nam := paste(nam, collapse=", "), by=doc]
cat1[, txt := paste(txt, collapse=", "), by=doc]
cat1 <- unique(cat1)
setkey(cat2,doc,cat)
cat2[, cat := paste(cat, collapse="." ), by=doc]
cat2[, nam := paste(nam, collapse=", "), by=doc]
cat2[, txt := paste(txt, collapse=", "), by=doc]
cat2 <- unique(cat2)
# Merge the coder's (coder1,coder2) codings by document
# For differing codes, both codes will be kept. Per code, a row will be added, holding the code of the one coder and a NA value for the other coder
d <- merge(cat1,cat2, by="doc", all=T, suffixes=c("1","2"))
d <- d[, lapply(.SD, function(x) ifelse(is.na(x),"",x))] # Clean
# Remove rows holding training documents because MethodsListSelect
# Would bias the inter-rater-reliability upwards
trained = read.xlsx("categorized_attributes/rater.training.results.all.domains.xlsx", sheet=domain)$docNam # load text used for the training
trained = unique(trained[!grepl("TRAINING",trained)])
setkey(d, doc)
# Remove training data
d = d[!trained]
# Inter rater reliability coefficients
gac = round(d[, gwet.ac1.raw(as.matrix(cbind(cat1,cat2)))][3], 2)
# Percent Agreement
pa = round(d[, mean(cat1==cat2)],2)
return(data.frame(gwets.ac=gac, percent.agree=pa))
})
IRR <- matrix(unlist(IRR), nrow = 2, dimnames = dimnames(IRR)) # Make matrix
colnames(IRR) <- expanddomains(colnames(IRR)) # More descriptive names
t(IRR) # View result
summary(IRR["gwets.ac", ]) # Summarize inter-rater reliability
Frequency of risk-taking attributes.
Because the instructions called for situational
rather than personal attributes, personal attributes
like âIf I were less a man I would …â
were excluded (8.5 % of all attributes); and
attributes for which participants reported to be
âequally likelyâ to engage in the risk were excluded
(0.13%) …
# Remove "equally likely"
percent(d[dir=="Equally", .N]/d[,.N]) # 0.13%
d <- d[!dir=="Equally"] # Remove
cat("Removed 'equally-likely' attributes.\n")
# Remove "self-reference" (= personal attributes)
percent(d[tolower(cue)=="self-reference", .N]/d[,.N]) # 8.42 %
i.cue_personal <- d[tolower(cue)=="self-reference", i.cue]
d <- d[!i.cue %in% i.cue_personal]
cat("Removed 'self-reference' attributes.\n")
# Frequency of risk-taking attributes
d[, .N, by=c("i","sit")][, list(MW=mean(N), MED=median(N), SD=sd(N), min=min(N), max=max(N))][, lapply(.SD, round, digits = 2)]
# Different frequencies across domains
anova <- aov(N ~ dom, data=d[, .N, by=list(i,dom,sit)])
summary(anova)
etaSquared(anova)
Table 1
Most Frequent Attributes Across 10 Evolutionary Risk Domains
The following lines of R-code generate Table 1 of the paper
# Table 1: Frequency of risk-taking attributes
d[, ntotal := length(unique(i)), by=dom] # total N per domain
d[, ncue := length(unique(i)), by=list(dom, cue)] # total N of cues per domain
d.most.cues <- d[, .(ncue = ncue, Rel = ncue/ntotal), by=list(dom, cue)] # Relative frequ of cues per domain
d.most.cues <- unique(d.most.cues) # delete duplicate rows
d.most.cues <- d.most.cues[, .SD[order(Rel, decreasing = T)]] #order
other_rows <- d.most.cues[, grepl("^Other$|^Other or wrong sex$",cue)]
d.most.cues <- d.most.cues[!other_rows, head(.SD,3), by = dom] # top 3
d.most.dir <- d[, .(
Less = length(unique(i[dir=="Less"])),
More = length(unique(i[dir=="More"]))),
by = list(dom,cue)]
d.most.cues <- merge(d.most.dir, d.most.cues, by =c("dom","cue"), all.y = TRUE)
d.most.cues[, Dir := ifelse(Less>More, "Less", ifelse(More>Less, "More", "?"))]
# Note that the names of the categories are slightly changed in the manuscript to facilitate better understanding
d.most.cues
# # Cosmetics for table in MS
# d.most.cues[, Rel := percent(round(Rel,2))]
# d.most.cues <- d.most.cues[order(dom)]
# d.most.cues[, Domain := factor(dom, labels=Vectorize(expanddomains)(levels(dom)))]
# d.most.cues <- d.most.cues[, .SD[order(Rel, decreasing = TRUE)], by = Domain]
# d.most.cues[, Domain2 := c("",levels(Domain)[Domain],""), by=Domain]
# d.most.cues[, Domain := Domain2]
# d.most.cues[, c("Domain2","dom","Less","More") := NULL]
# setcolorder(d.most.cues, c("Domain","cue", "Dir","ncue", "Rel"))
# d.most.cues
# write.table(
# htmlTable(d.most.cues,
# header = c("Domain", "Cue", "Dir", "N", "Rel"),
# rnames = FALSE),
# file = "Table.html", row.names = F, col.names =F)
# Note that in the publication some names have chaned slightly to facilitate understanding the categories
Tally: Relative Frequency of Positive Attributes Increases Risk Seeking
Tally suggests that increased riskiness is associated
with recalling more positive than negative
attributes ignoring any semantic differences
between attributes. Overall, about …
#####################################################################
# Tally: Relative frequency of positive attributes increases risk seeking
#####################################################################
# Tally data
d.tally <- as.data.table(dcast(sit + dom + i ~ orig.dir, data = d[, .N, by=list(i,sit,orig.dir,dom)], fill = 0, value.var = "N", strings.as.factors = FALSE)) # Number of more/less attributes, note: we use the original direction
d.tally[, total := Less + More] # Total number of cues
d.tally[, percent_more := More/total]
# Merge with demographics data
setkey(d.tally,i)
d.tally <- d.dem[d.tally]
# How many positive and negative attributes did men/women report?
d.tally[, .(Median_more = median(More),
Median_total = median(total)), by = fem]
d.tally.perPerson <- d.tally[, .(More = median(percent_more)), by = list(fem,i)]
# Gender differences reg. the rel. frequencies of positive attributes?
wilcox_test(More ~ as.factor(fem), data=d.tally.perPerson)
# Compute SRF and correlate it with risk data for subset of data
# Make sure you have sourced the functions in the utils folder at the beginning
SRF <- d.tally[, .(dom = dom, sit = sit, fem = fem, SRF = zstandardize(percent_more)), by = i]
# Unmique person x situation combinations
unique(d[, .(i, sit)])
# Merge SRF and risk domains
d.srf.rsk <- merge(SRF, d.rsk, by = c("i","sit","dom","fem"), all.x=T)
# Correlation per situation
d.srf.rsk[, round(cor(SRF,lik),2)]
# Aggregate within domains and compute correlation per domains
d.srf.rsk[, .(mwSRF=mean(SRF), mwlik=mean(lik)), by = dom][, round(cor(mwSRF,mwlik),2)]
Take the First: Retrieving a Positive Attribute as First Attribute Increases Risk Seeking
Take the first predicts that in the domains
with higher risk propensities more respondents
retrieve positive attributes first (attributes pointing
toward more risk). To explore this, we conducted
two analyses. …
#####################################################################
# LEX: Does the first cue predict risk taking?
#####################################################################
# Copy of data: data set containing only the first cue
d.lex <- copy(d)
d.lex <- d.lex[order==1] # Note: This excludes people who mentioned a
# "self-reference" attribute in
# the beginning
# Were the first attributes positive?
d.lex[, .(
N = sum(orig.dir=="More"), # we use the original direction
Total = .N,
percent = percent(prop.table(table(orig.dir))["More"]))]
# Gender difference in first-positive cues
setkey(d.dem,i)
setkey(d.lex,i)
d.lex <- d.dem[d.lex]
d.lex[, is.more := as.numeric(dir=="More")]
d.lex[, .(
MW = round(mean(is.more),2),
SD = round(sd(is.more),2)), by = fem ]
# Test of distribution of % positive attributes equal among men/women
d.lex.perPerson <- d.lex[, .(fem=fem, dom=dom, perc.more = mean(is.more)), by = i]
wilcox_test(perc.more ~ factor(fem), data=d.lex.perPerson)
# First positive attributes and risk-taking
d.lex.rsk <- merge(d.lex, d.rsk, by = c("i","sit","dom","fem"), all.x=T)
# Correlation per situation
d.lex.rsk[, round(biserial.cor(lik,is.more),2)]
d.lex.rsk[, round(biserial.cor(lik,is.more),2), by = fem]
# Correlation per domains
d.lex.rsk[, .(mw.is.more=mean(is.more), mwlik=mean(lik)), by = dom][, round(cor(mw.is.more,mwlik),2)]
# Domain status: outlier
d.lex[dom=="sta", .(MW=percent(mean(is.more)), N=sum(is.more), of=.N)]
Most Relevant: Frequency of Attributes Determines Risk Seeking
According to most relevant, the frequency of
attributes in the real world influences domainspecific
risk propensities. The proportion of …
Important: you have to run the chunc of code for Table 1 (above) before this, to generate the d.most.cues object in the r workspace
#####################################################################
# Most Relevant
#####################################################################
# How many people mention a most-mentioned cue
round(d.most.cues[, max(Rel), by = dom][, range(V1)],2)
# If this throws an error, you maybe have not run the code to generate table 1 (which you can find above). This code generates the d.most.cues object
d.most.cues[, max(ncue), by = dom][, range(V1)]
#####################################################################
# Comparison to google trends frequencies
#####################################################################
d.trends <- fread("google_trends/googletrends.csv", select = c("most.mentioned.cue", "dom", "mean.gtrends.hits"))
# Merge with main data (per domain x cue)
d.trends[, dom.cue.key := paste(dom, most.mentioned.cue)]
d[, dom.cue.key := paste(dom, cue)]
d.most <- merge(d[, c("i","dom","dom.cue.key","cue")], d.trends[, c("dom.cue.key","mean.gtrends.hits", "most.mentioned.cue")], by = "dom.cue.key", all = TRUE)
# Aggregate to contain only 1 row per domain x individual
d.most <- d.most[, .(
mean.gtrends.hits = mean(mean.gtrends.hits, na.rm=TRUE),
cue = unique(na.omit(most.mentioned.cue))),
by = list(dom,i)]
# Merge with original direction of the cue (by domain x person x cue)
d.most <- merge(d.most, d[, c("i","dom","cue", "orig.dir", "order")], by = c("dom","i","cue"), all.x = TRUE, all.y = FALSE)
tmp1 <- d.most[, .SD[which.min(order),], by = list(dom,i,cue)]
tmp2 <- d.most[is.na(order)]
d.most <- rbind(tmp1,tmp2)
# Merge with the main attribute data file
setkey(d.dem,i)
setkey(d.most,i)
d.most <- d.dem[d.most]
# Does number of people retrieving the cue reflect gtrends?
setkey(d.most.cues, dom)
setkey(d.trends, dom)
cor.n.gtrends.mostrelevant.cue <- d.most.cues[, .(Rel = Rel[which.max(Rel)]), by = dom][d.trends][, cor(Rel, mean.gtrends.hits)]
round(cor.n.gtrends.mostrelevant.cue, 2)
#####################################################################
# Run this for the graphical comparsion
#####################################################################
# # Binary indicator
# d.most.cues[, mentions.most := as.numeric(!is.na(cue))]
# # First positive attributes merged with risk data
# d.most.rsk <- merge(d.most.cues,
# d.rsk[, .(lik = mean(lik)), by = list(i,dom,fem)],
# by = c("i","dom","fem"),
# all.x = TRUE)
# Reset working directory to the dir. where your code is stored
setwd("../5-Code")
Here is some additional code that may come in handy when working with the data (not run by default)
# ########################################################################
# # Code to retrieve text examples of specific categories of attributes
##########################################################################
# d.raw <- fread("../4-Data/rawcues.csv")
# d.raw[, i.cue := paste(sit, "--", i, "--", cueNum, ".txt", sep = "")]
# d.raw <- merge(d, d.raw, by = "i.cue")
# category <- "Initiation by opponent"
# d.raw[cue.x==category, cue.y]
# d.raw[.x=="Evidence for hygene of the source"]
################################################################################
# CHECKS (not in the paper)
################################################################################
# # Not all respondents reported attributes for all situations
# # Does the risk propensity of the subset equal the full one?
# d.rsk <- fread("../4-Data/risk.csv")
# d.rsk[, dom := factor(dom, levels = rskDom)]
# d.rsk.subset <- merge(unique(d[, c("i","sit")]), d.rsk, by = c("i","sit"), all.x = TRUE)
# # Risk data
# rsk <- d.rsk[, .(mw.lik=mean(lik)), by = list(dom,fem)]
# rsk.subset <- d.rsk.subset[, .(mw.lik.subset=mean(lik)), by = list(dom,fem)]
# check <- merge(rsk, rsk.subset, by = c("dom","fem"))
# check[, cor(mw.lik, mw.lik.subset), by = fem][, lapply(V1, round, digits = 2), by = fem]
# # Number of people reporting only 1 attribute
# d[, .N, by = list(sit,i)][, all(N==1), by = i][isTRUE(V1)]
# # Check if people identical
# check_n_subset <- d.rsk.subset[, unique(i), keyby = dom][order(V1)]
# check_n <- d[, unique(i), keyby = dom][order(V1)]
# all(check_n_subset == check_n) # TRUE