alphaicon/code/alphaicon_paper/5_algorithm_evaluation.r

413 lines
23 KiB
R
Raw Permalink Normal View History

2021-09-16 08:06:49 +01:00
# This code evaluates various algorithms
# for ultimate owner identification
library(data.table)
library(stringi)
library(stringr)
library(igraph)
# Declare working directory beforehand in an environment variable
# ALPHAICON_PATH = "path_to_your_folder"
# with the aid of usethis::edit_r_environ()
# Restart R session for the changes to make effect
setwd(Sys.getenv('ALPHAICON_PATH'))
############
# Load and prepare the data
# Load the active participants snapshot of PSC (prepared by data_preparation/uk/2_psc_snapshot_to_participants_panel.r)
psc <- fread("output/uk/uk_organisations_participants_2021_long_2aug21.csv", integer64 = "character", na.strings = "", encoding = "UTF-8")
# Load the DPI of each participant after 10000 iterations (computed by alphaicon_paper/2_compute_npi_dpi.r)
holders_dpi <- fread("output/uk/npi_dpi/10000iter/uk_organisations_participants_2021_long_7sep21_dpi_10000iter.csv", integer64 = "character", na.strings = "", encoding = "UTF-8")
setnames(holders_dpi, c("entity", "participant", "dpi"), c("company_number", "participant_id", "share"))
holders_dpi[, times_pivotal := NULL ]
# Load the NPI of each participant after 10000 iterations (computed by alphaicon_paper/2_compute_npi_dpi.r)
holders_npi <- fread("output/uk/npi_dpi/10000iter/uk_organisations_participants_2021_long_7sep21_npi_10000iter.csv", integer64 = "character", na.strings = "", encoding = "UTF-8")
setnames(holders_npi, c("entity", "participant", "npi"), c("company_number", "participant_id", "share"))
holders_npi[, times_pivotal := NULL ]
# Load the transitive ownership data at various alpha
# (prepared by alphaicon_paper/1_compute_alphaicon.ipynb)
alphas <- c("0.0", "0.1", "0.2", "0.3", "0.4", "0.5", "0.6", "0.7", "0.8", "0.9", "0.999")
for( a in alphas) {
# Transitive ownership on equity shares
temp1 <- fread(paste0("output/uk/transitive/uk_organisations_transitive_ownership_alpha", a, "_2021_long_2aug21.csv"), integer64 = "character", na.strings = "", encoding = "UTF-8", colClasses = c("character", "character", "numeric"))
assign(paste0("holders_transitive_alpha", a), temp1)
# Transitive ownership on DPI weights
temp2 <- fread(paste0("output/uk/transitive/uk_organisations_transitive_ownership_alpha", a, "_2021_long_7sep21_dpi_10000iter.csv"), integer64 = "character", na.strings = "", encoding = "UTF-8", colClasses = c("character", "character", "numeric"))
assign(paste0("holders_transitivedpi_alpha", a), temp2)
rm(temp1); rm(temp2)
}
# Load the data on graph membership
# (prepared by alphaicon_paper/1_compute_alphaicon.ipynb)
graph_membership <- fread("output/uk/uk_organisations_participation_graph_core_periphery_membership_6aug21.csv", integer64 = "character", na.strings = "", encoding = "UTF-8", header = T, colClasses = c("numeric", "character", "factor"))
graph_membership[, V1 := NULL ]
setnames(graph_membership, "company_number/id", "participant_id")
# Load the data for evaluation of the algorithms (prepared by data_preparation/uk/3_prepare_affiliated_entities_evaluation_data.r)
affiliated_entities <- fread("data/uk/uk_parent_subsidiary_mapping_2020_2021_sec_filers_exhibit21.csv", integer64 = "character", na.strings = "", encoding = "UTF-8")
# Remove the CorpWatch entity identifiers
affiliated_entities[, c("subsidiary_cw_id", "parent_cw_id") := NULL ]
# Add the data on the shortest paths from parent to subsidiary
psc_graph <- graph_from_data_frame(psc[participant_id != company_number & !is.na(equity_share), c("participant_id", "company_number", "equity_share"), with = F], directed = T)
# Compute all the distances on the graph
# from parent companies in the gold standard
# data to the subsidiates
affiliated_distance_mat <- distances(psc_graph, v = which(V(psc_graph)$name %in% unique(affiliated_entities$participant_id)), to = which(V(psc_graph)$name %in% affiliated_entities$company_number), mode = "out")
# Arrange them so that the distance is on the main diagonal
affiliated_distance <- as.data.table(as.table(affiliated_distance_mat))
names(affiliated_distance) <- c("participant_id", "company_number", "path_length")
# Remove non-reachable entries
affiliated_distance <- affiliated_distance[ is.finite(path_length) ]
# Add the path lengths to the affiliated entities data
affiliated_entities <- merge(affiliated_entities, affiliated_distance, by = c("company_number", "participant_id"), all.x = T, all.y = F, sort = F)
#affiliated_entities[, .N, by = "path_length"]
#nrow(affiliated_entities[!is.na(path_length)] # 1007
# Keep only affiliated entities reachable on the PSC graph
affiliated_entities <- affiliated_entities[ !is.na(path_length) ]
# List all holder objects that are loaded in the memory
holder_objects <- paste0("holders_", c("dpi", "npi", paste0("transitive_alpha", alphas), paste0("transitivedpi_alpha", alphas)))
############
# Subset holder objects
# Keep only the affiliated entities in the holder objects (to speed up the computation time)
for(obj in holder_objects) {
assign(obj, get(obj)[ company_number %in% affiliated_entities$company_number | company_number %in% affiliated_entities$participant_id | participant_id %in% affiliated_entities$company_number | participant_id %in% affiliated_entities$participant_id])
# Add the entity type for objects
assign(obj, merge(get(obj), graph_membership, by = "participant_id", all.x = T, all.y = F, sort = F))
}
# Add the ranks
for(obj in holder_objects) {
# Sort by descending share within companies
setorderv(get(obj), c("company_number", "share"), c(1, -1))
# Add the rank
get(obj)[, rank := 1:.N, by = "company_number"]
# Devise another rank
# that ranks participants within types
get(obj)[, rankbygraphtype := 1:.N, by = c("company_number", "type")]
}
# Debug: holders_transitive_alpha0.5[company_number == "SL033942"]
# Create a single object with ranks under different algorithms
algorithms <- c("dpi", "npi", paste0("transitive_alpha", alphas), paste0("transitivedpi_alpha", alphas))
# Start with the actual participants data (for the baseline algorithm)
holders_algorithm_ranks <- psc[ company_number %in% affiliated_entities$company_number, c("company_number", "participant_id", "equity_share"), with = F]
holders_algorithm_ranks <- holders_algorithm_ranks[ !is.na(equity_share) ]
# For the baseline algorithm simply take the equity share
setorderv(holders_algorithm_ranks, c("company_number", "equity_share"), c(1, -1))
holders_algorithm_ranks[, rank_baseline := 1:.N, by = "company_number" ]
holders_algorithm_ranks[, equity_share := NULL ]
# For the remaining ranks query the holders object
for(alg in algorithms) {
cols_to_keep <- intersect(names(get(paste0("holders_", alg))), c("company_number", "participant_id", "rank", "rankbygraphtype"))
holders_algorithm_ranks <- merge(holders_algorithm_ranks, get(paste0("holders_", alg))[, c(cols_to_keep), with = F], by = c("company_number", "participant_id"), all.x = T, all.y = T, sort = F)
setnames(holders_algorithm_ranks, "rank", paste0("rank_", alg))
setnames(holders_algorithm_ranks, "rankbygraphtype", paste0("rankbygraphtype_", alg), skip_absent = T)
message(alg)
}
# Add the participant type in graph
holders_algorithm_ranks[, type := graph_membership[ match(holders_algorithm_ranks$participant_id, graph_membership$participant_id)]$type ]
# What rank do the actual super-holders from the affiliated entities evaluation data
# take under each algorithm
holders_algorithm_ranks_affiliated <- holders_algorithm_ranks[ company_number %in% affiliated_entities$company_number]
# Add the ground truth participant id
holders_algorithm_ranks_affiliated <- merge(holders_algorithm_ranks_affiliated, affiliated_entities[, -"path_length"], by = c("company_number"), all = T, sort = F)
# NB: We have duplicates:
# holders_algorithm_ranks_affiliated[ company_number == "00906355"]
# Debug:
#holders_algorithm_ranks[ company_number == "00027883"]
#holders_algorithm_ranks_affiliated[ company_number == "00027883"]
# Add the path lengths to the data
holders_algorithm_ranks_affiliated <- merge(holders_algorithm_ranks_affiliated, affiliated_entities, by.x = c("company_number", "participant_id.y"), by.y = c("company_number", "participant_id"), sort = F, all.x = T, all.y = F)
# What rank does the truth take under each algorithm
algorithms <- unique(c("baseline", algorithms))
for(alg in algorithms) {
holders_algorithm_ranks_affiliated[, paste0("rank_true_", alg) := NA_real_ ]
holders_algorithm_ranks_affiliated[participant_id.x == participant_id.y, paste0("rank_true_", alg) := get(paste0("rank_", alg)) ]
# Add the rank when we consider only super-holders
if( !grepl("baseline", alg) ) {
holders_algorithm_ranks_affiliated[ participant_id.x == participant_id.y & type == "SH", paste0("rankonlysuperholder_true_", alg) := get(paste0("rankbygraphtype_", alg))]
# Restore the original value if no rank for super-holder
holders_algorithm_ranks_affiliated[ is.na(get(paste0("rankonlysuperholder_true_", alg))) , paste0("rankonlysuperholder_true_", alg) := get(paste0("rank_true_", alg)) ]
}
}
# Aggregate at company level
cols_of_interest <- names(holders_algorithm_ranks_affiliated)[ grepl("rank.*_true_", names(holders_algorithm_ranks_affiliated))]
cols_of_interest <- c(cols_of_interest, "path_length")
affiliated_evaluation_results <- holders_algorithm_ranks_affiliated[, lapply(.SD, min, na.rm = T), .SDcols = cols_of_interest, by = "company_number"]
affiliated_evaluation_results[, c(cols_of_interest) := lapply(.SD, function(x) ifelse(is.finite(x), x, NA)), .SDcols = cols_of_interest]
# With the above min-aggregation we resolve the issue with duplicates
# affiliated_evaluation_results[ company_number == "00906355"]
# Compute recall @ different k for algorithms
## Init an object to store the recall
algorithm_recall <- data.table()
## Init an object to store the recall at different path lengths
algorithm_recall_by_pathlength <- data.table()
path_lengths <- unique(holders_algorithm_ranks_affiliated$path_length)
path_lengths <- path_lengths[ order(path_lengths) ]
path_lengths <- path_lengths[ !is.na(path_lengths) ]
for(k in 1:10) {
for( alg in algorithms ) {
# How many organisations have the true participant out of all k participants
recall <- nrow(affiliated_evaluation_results[ get(paste0("rank_true_", alg)) <= k ])/nrow(affiliated_evaluation_results)
algorithm_recall <- rbind(algorithm_recall, data.table(algorithm = alg, k = k, superholderpriority = 0, recall = recall), fill = T)
# Special recall considering only super-holders
if( !grepl("baseline", alg) ) {
recallsh <- nrow(affiliated_evaluation_results[ get(paste0("rankonlysuperholder_true_", alg)) <= k ])/nrow(affiliated_evaluation_results)
algorithm_recall <- rbind(algorithm_recall, data.table(algorithm = alg, k = k, superholderpriority = 1, recall = recallsh), fill = T)
}
# Same for each path lengths
for( p in path_lengths ) {
recallpath <- nrow(affiliated_evaluation_results[ get(paste0("rank_true_", alg)) <= k & path_length == p ])/nrow(affiliated_evaluation_results[ path_length == p | is.na(path_length) ])
algorithm_recall_by_pathlength <- rbind(algorithm_recall_by_pathlength, data.table(algorithm = alg, k = k, superholderpriority = 0, path = p, recall = recallpath), fill = T)
if( !grepl("baseline", alg) ) {
recallpathsh <- nrow(affiliated_evaluation_results[ get(paste0("rankonlysuperholder_true_", alg)) <= k & path_length == p ])/nrow(affiliated_evaluation_results[ path_length == p | is.na(path_length) ] )
algorithm_recall_by_pathlength <- rbind(algorithm_recall_by_pathlength, data.table(algorithm = alg, k = k, superholderpriority = 1, path = p, recall = recallpathsh), fill = T)
}
}
}
}
# Save point
fwrite(algorithm_recall, file = "output/alphaicon_paper/uk_orgs_algorithm_evaluation_recall.csv")
fwrite(algorithm_recall_by_pathlength, file = "output/alphaicon_paper/uk_orgs_algorithm_evaluation_recall_by_pathlength.csv")
# Manual inspection for tables
#algorithm_recall[ algorithm %in% c("baseline", "dpi", "npi", "transitive_alpha0.0", "transitive_alpha0.999", "transitivedpi_alpha0.999") & k == 1 ]
#algorithm_recall[ algorithm %in% c("baseline", "dpi", "npi", "transitive_alpha0.0", "transitive_alpha0.999", "transitivedpi_alpha0.999") & k == 3 ]
#algorithm_recall[ algorithm %in% c("baseline", "dpi", "npi", "transitive_alpha0.0", "transitive_alpha0.999", "transitivedpi_alpha0.999") & k == 5 ]
#algorithm_recall[ algorithm %in% c("baseline", "dpi", "npi", "transitive_alpha0.0", "transitive_alpha0.999", "transitivedpi_alpha0.999") & k == 10 ]
#algorithm_recall_by_pathlength[ algorithm %in% c("baseline", "dpi", "npi", "transitive_alpha0.0", "transitive_alpha0.999", "transitivedpi_alpha0.999") & k == 1 & path == 1 ]
#algorithm_recall_by_pathlength[ algorithm %in% c("baseline", "dpi", "npi", "transitive_alpha0.0", "transitive_alpha0.999", "transitivedpi_alpha0.999") & k == 1 & path == 2 ]
#algorithm_recall_by_pathlength[ algorithm %in% c("baseline", "dpi", "npi", "transitive_alpha0.0", "transitive_alpha0.999", "transitivedpi_alpha0.999") & k == 1 & path == 3 ]
#algorithm_recall_by_pathlength[ algorithm %in% c("baseline", "dpi", "npi", "transitive_alpha0.0", "transitive_alpha0.999", "transitivedpi_alpha0.999") & k == 1 & path == 4 ]
#algorithm_recall_by_pathlength[ algorithm %in% c("baseline", "dpi", "npi", "transitive_alpha0.0", "transitive_alpha0.999", "transitivedpi_alpha0.999") & k == 1 & path == 5 ]
#algorithm_recall_by_pathlength[ algorithm %in% c("baseline", "dpi", "npi", "transitive_alpha0.0", "transitive_alpha0.999", "transitivedpi_alpha0.999") & k == 1 & path == 10 ]
#algorithm_recall_by_pathlength[ algorithm %in% c("baseline", "dpi", "npi", "transitive_alpha0.0", "transitive_alpha0.999", "transitivedpi_alpha0.999") & k == 1 & path == 12 ]
############
# Plot the results
library(data.table)
library(ggplot2)
library(ggthemes)
library(ggrepel)
library(stringi)
library(showtext)
setwd(Sys.getenv('ALPHAICON_PATH'))
# Add the font to use
font_add_google("Open Sans", "Open Sans")
showtext_auto()
# Load the files with evaluation results
algorithm_recall <- fread("output/alphaicon_paper/uk_orgs_algorithm_evaluation_recall.csv", encoding = "UTF-8")
algorithm_recall_by_pathlength <- fread("output/alphaicon_paper/uk_orgs_algorithm_evaluation_recall_by_pathlength.csv", encoding = "UTF-8")
for(obj in c("algorithm_recall", "algorithm_recall_by_pathlength") ) {
# Column marking whether we use DPI weights
get(obj)[, dpi_weights := 0]
get(obj)[grepl("dpi", algorithm, ignore.case = T), dpi_weights := 1]
# Shorter names
get(obj)[, algorithm := gsub("transitive.*_alpha", "α=", algorithm)]
get(obj)[, algorithm := gsub("transitivedpi_alpha", "α=", algorithm)]
# Extract alpha values from the name
get(obj)[, alpha := as.numeric(stri_split_fixed(algorithm, "α=", n = 2, simplify = T)[,2]) ]
# Add the superholder mark to corresponding algorithm names
get(obj)[ superholderpriority == 1, algorithm := paste0(algorithm, " SH")]
# Add the DPI weight mark to corresponding algorithm names
get(obj)[ dpi_weights == 1 & grepl("α", algorithm), algorithm := paste0(algorithm, " DPI")]
# Certain names to upper case
get(obj)[, algorithm := gsub("dpi", "DPI", algorithm) ]
get(obj)[, algorithm := gsub("npi", "NPI", algorithm) ]
}
########
# Produce the evaluation plots (overall)
# One subset for display: only the best performing α-ICON versus other algorithms
algorithm_recall_otheralgs <- algorithm_recall[ algorithm %in% c("DPI", "NPI", "α=0.999", "α=0.999 SH")]
## Other algorithms vs best and worst-performing α-ICON algorithm
recall_plot <- ggplot(aes(x = k, y = recall, group = algorithm, color = algorithm), data = algorithm_recall_otheralgs) +
geom_line(size = 1.5, alpha = 1) +
geom_point(size = 2) +
scale_x_continuous(breaks = 1:10) +
geom_text(aes(label = algorithm, colour = algorithm, x = k-1, y = recall), data = algorithm_recall_otheralgs[k == 1 & algorithm == "DPI"], size = 8, hjust = 0.3) +
geom_text(aes(label = algorithm, colour = algorithm, x = k-1, y = recall), data = algorithm_recall_otheralgs[k == 1 & algorithm == "α=0.999"], size = 8, hjust = 0.7) +
geom_text(aes(label = algorithm, colour = algorithm, x = k-1, y = recall), data = algorithm_recall_otheralgs[k == 1 & algorithm == "NPI"], size = 8, hjust = 0.3) +
geom_text(aes(label = algorithm, colour = algorithm, x = k-1, y = recall), data = algorithm_recall_otheralgs[k == 1 & algorithm == "α=0.999 SH"], size = 8, hjust = 0.5, vjust = -0.6) +
#geom_text_repel(aes(label = algorithm, colour = algorithm, x = k-1, y = recall), data = algorithm_recall_otheralgs[k == 1], size = 8, hjust = 0.5) +
labs(y = expression("Recall @ "~italic("k")), x = expression(italic("k"))) +
#scale_colour_brewer(type = "qual", palette = 2) +
scale_color_manual(name = "", values = c("#b2df8a", "#33a02c", "#a6cee3", "#1f78b4")) +
expand_limits(x = c(-1.5, 10)) +
theme_minimal() + theme(legend.position = "none", text = element_text(size = 18, family = "Open Sans"), panel.grid.major = element_blank(), panel.grid.minor = element_blank())
ggsave(recall_plot, file = "output/alphaicon_paper/recall_plot.pdf", width = 12, height = 9, device = cairo_pdf, scale = 0.6)
# Another subset for display: only α-ICON under different alphas
algorithm_recall_onlyalphas <- algorithm_recall[ alpha > 0 & superholderpriority == 1 & dpi_weights == 0]
# Group the duplicated values of recall at alphas
alpha_group <- algorithm_recall_onlyalphas[ k == 1, list(alphagroup = .GRP), by = "recall"]
alpha_group <- merge(alpha_group, algorithm_recall_onlyalphas[ k == 1, c("alpha", "recall")], by = "recall")
alpha_group[, recall := NULL]
# Generate the label text for alpha group
alpha_group[, minalpha := min(alpha), by = "alphagroup"]
alpha_group[, maxalpha := max(alpha), by = "alphagroup"]
alpha_group[ minalpha == maxalpha, alphagrouplabel := as.character(round(alpha, 1))]
alpha_group[ minalpha != maxalpha, alphagrouplabel := as.character(paste0(round(minalpha, 1), "-", round(maxalpha, 3)))]
alpha_group[, alphagrouplabel := paste0("α=", alphagrouplabel)]
algorithm_recall_onlyalphas[, alphagroup := alpha_group[match(algorithm_recall_onlyalphas$alpha, alpha_group$alpha)]$alphagrouplabel]
# Make alpha groups unique
algorithm_recall_onlyalphas <- unique(algorithm_recall_onlyalphas, by = c("alphagroup", "k"))
## Other algorithms vs best and worst-performing transitive algorithm
recall_plot_alphas <- ggplot(aes(x = k, y = recall, group = alphagroup, color = as.factor(alpha)), data = unique(algorithm_recall_onlyalphas)) +
geom_line(size = 1.5, alpha = 1) +
geom_point(size = 2) +
scale_x_continuous(breaks = 1:10) +
geom_text(aes(label = alphagroup, colour = as.factor(alpha), group = alphagroup, x = k-2.5, y = recall), size = 8, data = algorithm_recall_onlyalphas[k == 4 & alpha == 0.1 ], hjust = 0.3) +
geom_text(aes(label = alphagroup, colour = as.factor(alpha), group = alphagroup, x = k-2.5, y = recall), size = 8, data = algorithm_recall_onlyalphas[k == 2 & alpha == 0.5 ], vjust = -4, hjust = 0.1) +
geom_text(aes(label = alphagroup, colour = as.factor(alpha), group = alphagroup, x = k-2.5, y = recall), size = 8, data = algorithm_recall_onlyalphas[k == 3 & alpha == 0.7 ], hjust = -0.1) +
geom_text(aes(label = alphagroup, colour = as.factor(alpha), group = alphagroup, x = k-2.5, y = recall), size = 8, data = algorithm_recall_onlyalphas[k == 1 & alpha == 0.8 ], vjust = 1, hjust = -0.1) +
geom_text(aes(label = alphagroup, colour = as.factor(alpha), group = alphagroup, x = k-2.5, y = recall), size = 8, data = algorithm_recall_onlyalphas[k == 2 & alpha == 0.9 ], hjust = 0.4) +
#geom_text_repel(aes(label = alphagroup, colour = as.factor(alpha), group = alphagroup, x = k-2.5, y = recall), size = 8, data = algorithm_recall_onlyalphas[k == 1], hjust = 0, min.segment.length = 10) +
scale_colour_brewer(type = "qual", palette = 2) +
labs(y = expression("Recall @ "~italic("k")), x = expression(italic("k"))) +
expand_limits(x = c(-2, 10)) +
theme_minimal() + theme(legend.position = "none", text = element_text(size = 18, family = "Open Sans"), panel.grid.major = element_blank(), panel.grid.minor = element_blank())
ggsave(recall_plot_alphas, file = "output/alphaicon_paper/recall_plot_alphas.pdf", width = 12, height = 9, device = cairo_pdf, scale = 0.6)
# For reporting: table with recall
algorithms_to_report <- c("baseline", "DPI", "NPI", "NPI SH", "α=0.0", "α=0.999", "α=0.999 SH", "α=0.999 SH DPI")
algorithm_recall[, recall := round(recall, 3) ]
algorithm_recall[ k == 1 & algorithm %in% algorithms_to_report, c("algorithm", "recall")]
algorithm_recall[ k == 3 & algorithm %in% algorithms_to_report, c("algorithm", "recall")]
algorithm_recall[ k == 5 & algorithm %in% algorithms_to_report, c("algorithm", "recall")]
algorithm_recall[ k == 10 & algorithm %in% algorithms_to_report, c("algorithm", "recall")]
########
# Produce the evaluation plots (by path length)
# One subset for display: only the best performing α-ICON versus other algorithms
algorithm_recall_by_pathlength_otheralgs <- algorithm_recall_by_pathlength[ algorithm %in% c("NPI", "α=0.999", "α=0.999 SH") ]
## Other algorithms vs best and worst-performing α-ICON algorithm
recall_path_plot <- ggplot(aes(x = path, y = recall, group = algorithm, color = algorithm), data = algorithm_recall_by_pathlength_otheralgs[ k == 1 ]) +
geom_line(size = 1.5, alpha = 1) +
geom_point(size = 2) +
scale_x_continuous(breaks = 1:12) +
scale_y_continuous(limits = c(0, 1)) +
labs(y = "Recall @ 1 for parents within path", x = expression(italic("l")~", path length from subsidiary to parent")) +
#scale_colour_brewer(type = "qual", palette = 3) +
scale_color_manual(name = "", values = c("#b2df8a", "#33a02c", "#1f78b4")) +
expand_limits(x = c(1, 12)) +
theme_minimal() + theme(legend.position = "bottom", text = element_text(size = 18, family = "Open Sans"), panel.grid.major = element_blank(), panel.grid.minor = element_blank())
ggsave(recall_path_plot, file = "output/alphaicon_paper/recall_path_plot_at_1.pdf", width = 12, height = 9, device = cairo_pdf, scale = 0.6)
# For reporting: table with recall
algorithm_recall_by_pathlength[, recall := round(recall, 3) ]
algorithms_path_length_to_report <- c("baseline", "DPI", "NPI", "NPI SH", "α=0.0", "α=0.999", "α=0.999 SH", "α=0.999 SH DPI")
algorithm_recall_by_pathlength[ path == 1 & algorithm %in% algorithms_path_length_to_report & k == 1, c("algorithm", "recall") ]
algorithm_recall_by_pathlength[ path == 2 & algorithm %in% algorithms_path_length_to_report & k == 1, c("algorithm", "recall") ]
algorithm_recall_by_pathlength[ path == 3 & algorithm %in% algorithms_path_length_to_report & k == 1, c("algorithm", "recall") ]
algorithm_recall_by_pathlength[ path == 4 & algorithm %in% algorithms_path_length_to_report & k == 1, c("algorithm", "recall") ]
algorithm_recall_by_pathlength[ path == 5 & algorithm %in% algorithms_path_length_to_report & k == 1, c("algorithm", "recall") ]
algorithm_recall_by_pathlength[ path == 10 & algorithm %in% algorithms_path_length_to_report & k == 1, c("algorithm", "recall") ]
algorithm_recall_by_pathlength[ path == 12 & algorithm %in% algorithms_path_length_to_report & k == 1, c("algorithm", "recall") ]