alphaicon/code/alphaicon_paper/5_algorithm_evaluation.r

# This code evaluates various algorithms
# for ultimate owner identification

library(data.table)
library(stringi)
library(stringr)
library(igraph)

# Declare working directory beforehand in an environment variable
# ALPHAICON_PATH = "path_to_your_folder"
# with the aid of usethis::edit_r_environ()
# Restart R session for the changes to make effect
setwd(Sys.getenv('ALPHAICON_PATH'))

############
# Load and prepare the data

# Load the active participants snapshot of PSC (prepared by data_preparation/uk/2_psc_snapshot_to_participants_panel.r)
psc <- fread("output/uk/uk_organisations_participants_2021_long_2aug21.csv", integer64 = "character", na.strings = "", encoding = "UTF-8")

# Load the DPI of each participant after 10000 iterations (computed by alphaicon_paper/2_compute_npi_dpi.r)
holders_dpi <- fread("output/uk/npi_dpi/10000iter/uk_organisations_participants_2021_long_7sep21_dpi_10000iter.csv", integer64 = "character", na.strings = "", encoding = "UTF-8")
setnames(holders_dpi, c("entity", "participant", "dpi"), c("company_number", "participant_id", "share"))
holders_dpi[, times_pivotal := NULL ]

# Load the NPI of each participant after 10000 iterations (computed by alphaicon_paper/2_compute_npi_dpi.r)
holders_npi <- fread("output/uk/npi_dpi/10000iter/uk_organisations_participants_2021_long_7sep21_npi_10000iter.csv", integer64 = "character", na.strings = "", encoding = "UTF-8")
setnames(holders_npi, c("entity", "participant", "npi"), c("company_number", "participant_id", "share"))
holders_npi[, times_pivotal := NULL ]

# Load the transitive ownership data at various alpha
# (prepared by alphaicon_paper/1_compute_alphaicon.ipynb)
alphas <- c("0.0", "0.1", "0.2", "0.3", "0.4", "0.5", "0.6", "0.7", "0.8", "0.9", "0.999")

for( a in alphas) {

	# Transitive ownership on equity shares
	temp1 <- fread(paste0("output/uk/transitive/uk_organisations_transitive_ownership_alpha", a, "_2021_long_2aug21.csv"), integer64 = "character", na.strings = "", encoding = "UTF-8", colClasses = c("character", "character", "numeric"))

	assign(paste0("holders_transitive_alpha", a), temp1)

	# Transitive ownership on DPI weights
	temp2 <- fread(paste0("output/uk/transitive/uk_organisations_transitive_ownership_alpha", a, "_2021_long_7sep21_dpi_10000iter.csv"), integer64 = "character", na.strings = "", encoding = "UTF-8", colClasses = c("character", "character", "numeric"))

	assign(paste0("holders_transitivedpi_alpha", a), temp2)

	rm(temp1); rm(temp2)

}

# Load the data on graph membership
# (prepared by alphaicon_paper/1_compute_alphaicon.ipynb)
graph_membership <- fread("output/uk/uk_organisations_participation_graph_core_periphery_membership_6aug21.csv", integer64 = "character", na.strings = "", encoding = "UTF-8", header = T, colClasses = c("numeric", "character", "factor"))
graph_membership[, V1 := NULL ]
setnames(graph_membership, "company_number/id", "participant_id")

# Load the data for evaluation of the algorithms (prepared by data_preparation/uk/3_prepare_affiliated_entities_evaluation_data.r)
affiliated_entities <- fread("data/uk/uk_parent_subsidiary_mapping_2020_2021_sec_filers_exhibit21.csv", integer64 = "character", na.strings = "", encoding = "UTF-8")
# Remove the CorpWatch entity identifiers
affiliated_entities[, c("subsidiary_cw_id", "parent_cw_id") := NULL ]

# Add the data on the shortest paths from parent to subsidiary
psc_graph <- graph_from_data_frame(psc[participant_id != company_number & !is.na(equity_share), c("participant_id", "company_number", "equity_share"), with = F], directed = T)

# Compute all the distances on the graph
# from parent companies in the gold standard
# data to the subsidiates
affiliated_distance_mat <- distances(psc_graph, v = which(V(psc_graph)$name %in% unique(affiliated_entities$participant_id)), to = which(V(psc_graph)$name %in% affiliated_entities$company_number), mode = "out")

# Arrange them so that the distance is on the main diagonal
affiliated_distance <- as.data.table(as.table(affiliated_distance_mat))
names(affiliated_distance) <- c("participant_id", "company_number", "path_length")
# Remove non-reachable entries
affiliated_distance <- affiliated_distance[ is.finite(path_length) ]

# Add the path lengths to the affiliated entities data
affiliated_entities <- merge(affiliated_entities, affiliated_distance, by = c("company_number", "participant_id"), all.x = T, all.y = F, sort = F)
#affiliated_entities[, .N, by = "path_length"]
#nrow(affiliated_entities[!is.na(path_length)] # 1007

# Keep only affiliated entities reachable on the PSC graph
affiliated_entities <- affiliated_entities[ !is.na(path_length) ]

# List all holder objects that are loaded in the memory
holder_objects <- paste0("holders_", c("dpi", "npi", paste0("transitive_alpha", alphas), paste0("transitivedpi_alpha", alphas)))

############
# Subset holder objects

# Keep only the affiliated entities in the holder objects (to speed up the computation time)
for(obj in holder_objects) {

	assign(obj, get(obj)[ company_number %in% affiliated_entities$company_number | company_number %in% affiliated_entities$participant_id | participant_id %in% affiliated_entities$company_number | participant_id %in% affiliated_entities$participant_id])

	# Add the entity type for objects
	assign(obj, merge(get(obj), graph_membership, by = "participant_id", all.x = T, all.y = F, sort = F))

}

# Add the ranks
for(obj in holder_objects) {

	# Sort by descending share within companies
	setorderv(get(obj), c("company_number", "share"), c(1, -1))

	# Add the rank
	get(obj)[, rank := 1:.N, by = "company_number"]

	# Devise another rank
	# that ranks participants within types
	get(obj)[, rankbygraphtype := 1:.N, by = c("company_number", "type")]

}

# Debug: holders_transitive_alpha0.5[company_number == "SL033942"]

# Create a single object with ranks under different algorithms
algorithms <- c("dpi", "npi", paste0("transitive_alpha", alphas), paste0("transitivedpi_alpha", alphas))

# Start with the actual participants data (for the baseline algorithm)
holders_algorithm_ranks <- psc[ company_number %in% affiliated_entities$company_number, c("company_number", "participant_id", "equity_share"), with = F]
holders_algorithm_ranks <- holders_algorithm_ranks[ !is.na(equity_share) ]

# For the baseline algorithm simply take the equity share
setorderv(holders_algorithm_ranks, c("company_number", "equity_share"), c(1, -1))
holders_algorithm_ranks[, rank_baseline := 1:.N, by = "company_number" ]
holders_algorithm_ranks[, equity_share := NULL ]

# For the remaining ranks query the holders object
for(alg in algorithms) {

	cols_to_keep <- intersect(names(get(paste0("holders_", alg))), c("company_number", "participant_id", "rank", "rankbygraphtype"))

	holders_algorithm_ranks <- merge(holders_algorithm_ranks, get(paste0("holders_", alg))[, c(cols_to_keep), with = F], by = c("company_number", "participant_id"), all.x = T, all.y = T, sort = F)
	setnames(holders_algorithm_ranks, "rank", paste0("rank_", alg))
	setnames(holders_algorithm_ranks, "rankbygraphtype", paste0("rankbygraphtype_", alg), skip_absent = T)

	message(alg)

}

# Add the participant type in graph
holders_algorithm_ranks[, type := graph_membership[ match(holders_algorithm_ranks$participant_id, graph_membership$participant_id)]$type ]

# What rank do the actual super-holders from the affiliated entities evaluation data
# take under each algorithm
holders_algorithm_ranks_affiliated <- holders_algorithm_ranks[ company_number %in% affiliated_entities$company_number]

# Add the ground truth participant id
holders_algorithm_ranks_affiliated <- merge(holders_algorithm_ranks_affiliated, affiliated_entities[, -"path_length"], by = c("company_number"), all = T, sort = F)

# NB: We have duplicates:
# holders_algorithm_ranks_affiliated[ company_number == "00906355"]

# Debug:
#holders_algorithm_ranks[ company_number == "00027883"]
#holders_algorithm_ranks_affiliated[ company_number == "00027883"]

# Add the path lengths to the data
holders_algorithm_ranks_affiliated <- merge(holders_algorithm_ranks_affiliated, affiliated_entities, by.x = c("company_number", "participant_id.y"), by.y = c("company_number", "participant_id"), sort = F, all.x = T, all.y = F)

# What rank does the truth take under each algorithm
algorithms <- unique(c("baseline", algorithms))

for(alg in algorithms) {

	holders_algorithm_ranks_affiliated[, paste0("rank_true_", alg) := NA_real_ ]

	holders_algorithm_ranks_affiliated[participant_id.x == participant_id.y, paste0("rank_true_", alg) := get(paste0("rank_", alg)) ]

	# Add the rank when we consider only super-holders
	if( !grepl("baseline", alg) ) {

		holders_algorithm_ranks_affiliated[ participant_id.x == participant_id.y & type == "SH", paste0("rankonlysuperholder_true_", alg) := get(paste0("rankbygraphtype_", alg))]

		# Restore the original value if no rank for super-holder
		holders_algorithm_ranks_affiliated[ is.na(get(paste0("rankonlysuperholder_true_", alg))) , paste0("rankonlysuperholder_true_", alg) := get(paste0("rank_true_", alg)) ]


	}

}

# Aggregate at company level
cols_of_interest <- names(holders_algorithm_ranks_affiliated)[ grepl("rank.*_true_", names(holders_algorithm_ranks_affiliated))]
cols_of_interest <- c(cols_of_interest, "path_length")

affiliated_evaluation_results <- holders_algorithm_ranks_affiliated[, lapply(.SD, min, na.rm = T), .SDcols = cols_of_interest, by = "company_number"]
affiliated_evaluation_results[, c(cols_of_interest) := lapply(.SD, function(x) ifelse(is.finite(x), x, NA)), .SDcols = cols_of_interest]

# With the above min-aggregation we resolve the issue with duplicates
# affiliated_evaluation_results[ company_number == "00906355"]

# Compute recall @ different k for algorithms
## Init an object to store the recall
algorithm_recall <- data.table()

## Init an object to store the recall at different path lengths
algorithm_recall_by_pathlength <- data.table()

path_lengths <- unique(holders_algorithm_ranks_affiliated$path_length)
path_lengths <- path_lengths[ order(path_lengths) ]
path_lengths <- path_lengths[ !is.na(path_lengths) ]

for(k in 1:10) {

	for( alg in algorithms ) {

		# How many organisations have the true participant out of all k participants
		recall <- nrow(affiliated_evaluation_results[ get(paste0("rank_true_", alg)) <= k ])/nrow(affiliated_evaluation_results)

		algorithm_recall <- rbind(algorithm_recall, data.table(algorithm = alg, k = k, superholderpriority = 0, recall = recall), fill = T)

		# Special recall considering only super-holders
		if( !grepl("baseline", alg) ) {

			recallsh <- nrow(affiliated_evaluation_results[ get(paste0("rankonlysuperholder_true_", alg)) <= k ])/nrow(affiliated_evaluation_results)

			algorithm_recall <- rbind(algorithm_recall, data.table(algorithm = alg, k = k, superholderpriority = 1, recall = recallsh), fill = T)

		}

		# Same for each path lengths
		for( p in path_lengths ) {

			recallpath <- nrow(affiliated_evaluation_results[ get(paste0("rank_true_", alg)) <= k & path_length == p ])/nrow(affiliated_evaluation_results[ path_length == p | is.na(path_length) ])


			algorithm_recall_by_pathlength <- rbind(algorithm_recall_by_pathlength, data.table(algorithm = alg, k = k, superholderpriority = 0, path = p, recall = recallpath), fill = T)

			if( !grepl("baseline", alg) ) {

				recallpathsh <- nrow(affiliated_evaluation_results[ get(paste0("rankonlysuperholder_true_", alg)) <= k & path_length == p ])/nrow(affiliated_evaluation_results[ path_length == p | is.na(path_length) ] )

				algorithm_recall_by_pathlength <- rbind(algorithm_recall_by_pathlength, data.table(algorithm = alg, k = k, superholderpriority = 1, path = p, recall = recallpathsh), fill = T)

			}

		}

	}

}

# Save point
fwrite(algorithm_recall, file = "output/alphaicon_paper/uk_orgs_algorithm_evaluation_recall.csv")
fwrite(algorithm_recall_by_pathlength, file = "output/alphaicon_paper/uk_orgs_algorithm_evaluation_recall_by_pathlength.csv")

# Manual inspection for tables
#algorithm_recall[ algorithm %in% c("baseline", "dpi", "npi", "transitive_alpha0.0", "transitive_alpha0.999", "transitivedpi_alpha0.999") & k == 1 ]
#algorithm_recall[ algorithm %in% c("baseline", "dpi", "npi", "transitive_alpha0.0", "transitive_alpha0.999", "transitivedpi_alpha0.999") & k == 3 ]
#algorithm_recall[ algorithm %in% c("baseline", "dpi", "npi", "transitive_alpha0.0", "transitive_alpha0.999", "transitivedpi_alpha0.999") & k == 5 ]
#algorithm_recall[ algorithm %in% c("baseline", "dpi", "npi", "transitive_alpha0.0", "transitive_alpha0.999", "transitivedpi_alpha0.999") & k == 10 ]

#algorithm_recall_by_pathlength[ algorithm %in% c("baseline", "dpi", "npi", "transitive_alpha0.0", "transitive_alpha0.999", "transitivedpi_alpha0.999") & k == 1 & path == 1 ]
#algorithm_recall_by_pathlength[ algorithm %in% c("baseline", "dpi", "npi", "transitive_alpha0.0", "transitive_alpha0.999", "transitivedpi_alpha0.999") & k == 1 & path == 2 ]
#algorithm_recall_by_pathlength[ algorithm %in% c("baseline", "dpi", "npi", "transitive_alpha0.0", "transitive_alpha0.999", "transitivedpi_alpha0.999") & k == 1 & path == 3 ]
#algorithm_recall_by_pathlength[ algorithm %in% c("baseline", "dpi", "npi", "transitive_alpha0.0", "transitive_alpha0.999", "transitivedpi_alpha0.999") & k == 1 & path == 4 ]
#algorithm_recall_by_pathlength[ algorithm %in% c("baseline", "dpi", "npi", "transitive_alpha0.0", "transitive_alpha0.999", "transitivedpi_alpha0.999") & k == 1 & path == 5 ]
#algorithm_recall_by_pathlength[ algorithm %in% c("baseline", "dpi", "npi", "transitive_alpha0.0", "transitive_alpha0.999", "transitivedpi_alpha0.999") & k == 1 & path == 10 ]
#algorithm_recall_by_pathlength[ algorithm %in% c("baseline", "dpi", "npi", "transitive_alpha0.0", "transitive_alpha0.999", "transitivedpi_alpha0.999") & k == 1 & path == 12 ]


############
# Plot the results

library(data.table)
library(ggplot2)
library(ggthemes)
library(ggrepel)
library(stringi)
library(showtext)

setwd(Sys.getenv('ALPHAICON_PATH'))

# Add the font to use
font_add_google("Open Sans", "Open Sans")
showtext_auto()

# Load the files with evaluation results
algorithm_recall <- fread("output/alphaicon_paper/uk_orgs_algorithm_evaluation_recall.csv", encoding = "UTF-8")
algorithm_recall_by_pathlength <- fread("output/alphaicon_paper/uk_orgs_algorithm_evaluation_recall_by_pathlength.csv", encoding = "UTF-8")

for(obj in c("algorithm_recall", "algorithm_recall_by_pathlength") ) {

	# Column marking whether we use DPI weights
	get(obj)[, dpi_weights := 0]
	get(obj)[grepl("dpi", algorithm, ignore.case = T), dpi_weights := 1]

	# Shorter names
	get(obj)[, algorithm := gsub("transitive.*_alpha", "α=", algorithm)]
	get(obj)[, algorithm := gsub("transitivedpi_alpha", "α=", algorithm)]

	# Extract alpha values from the name
	get(obj)[, alpha := as.numeric(stri_split_fixed(algorithm, "α=", n = 2, simplify = T)[,2]) ]

	# Add the superholder mark to corresponding algorithm names
	get(obj)[ superholderpriority == 1, algorithm := paste0(algorithm, " SH")]

	# Add the DPI weight mark to corresponding algorithm names
	get(obj)[ dpi_weights == 1 & grepl("α", algorithm), algorithm := paste0(algorithm, " DPI")]

	# Certain names to upper case
	get(obj)[, algorithm := gsub("dpi", "DPI", algorithm) ]
	get(obj)[, algorithm := gsub("npi", "NPI", algorithm) ]

}

########
# Produce the evaluation plots (overall)

# One subset for display: only the best performing α-ICON versus other algorithms
algorithm_recall_otheralgs <- algorithm_recall[ algorithm %in% c("DPI", "NPI", "α=0.999", "α=0.999 SH")]

## Other algorithms vs best and worst-performing α-ICON algorithm
recall_plot <- ggplot(aes(x = k, y = recall, group = algorithm, color = algorithm), data = algorithm_recall_otheralgs) +
				geom_line(size = 1.5, alpha = 1) +
				geom_point(size = 2) +
				scale_x_continuous(breaks = 1:10) +
  				geom_text(aes(label = algorithm, colour = algorithm, x = k-1, y = recall), data = algorithm_recall_otheralgs[k == 1 & algorithm == "DPI"], size = 8, hjust = 0.3) +
  				geom_text(aes(label = algorithm, colour = algorithm, x = k-1, y = recall), data = algorithm_recall_otheralgs[k == 1 & algorithm == "α=0.999"], size = 8, hjust = 0.7) +
  				geom_text(aes(label = algorithm, colour = algorithm, x = k-1, y = recall), data = algorithm_recall_otheralgs[k == 1 & algorithm == "NPI"], size = 8, hjust = 0.3) +
  				geom_text(aes(label = algorithm, colour = algorithm, x = k-1, y = recall), data = algorithm_recall_otheralgs[k == 1 & algorithm == "α=0.999 SH"], size = 8, hjust = 0.5, vjust = -0.6) +
  				#geom_text_repel(aes(label = algorithm, colour = algorithm, x = k-1, y = recall), data = algorithm_recall_otheralgs[k == 1], size = 8, hjust = 0.5) +
				labs(y = expression("Recall @ "~italic("k")), x = expression(italic("k"))) +
				#scale_colour_brewer(type = "qual", palette = 2) +
				scale_color_manual(name = "", values = c("#b2df8a", "#33a02c", "#a6cee3", "#1f78b4")) +
				expand_limits(x = c(-1.5, 10)) +
				theme_minimal() + theme(legend.position = "none", text = element_text(size = 18, family = "Open Sans"), panel.grid.major = element_blank(), panel.grid.minor = element_blank())

ggsave(recall_plot, file = "output/alphaicon_paper/recall_plot.pdf", width = 12, height = 9, device = cairo_pdf, scale = 0.6)

# Another subset for display: only α-ICON under different alphas
algorithm_recall_onlyalphas <- algorithm_recall[ alpha > 0 & superholderpriority == 1 & dpi_weights == 0]

# Group the duplicated values of recall at alphas
alpha_group <- algorithm_recall_onlyalphas[ k == 1, list(alphagroup = .GRP), by = "recall"]
alpha_group <- merge(alpha_group, algorithm_recall_onlyalphas[ k == 1, c("alpha", "recall")], by = "recall")
alpha_group[, recall := NULL]
# Generate the label text for alpha group
alpha_group[, minalpha := min(alpha), by = "alphagroup"]
alpha_group[, maxalpha := max(alpha), by = "alphagroup"]

alpha_group[ minalpha == maxalpha, alphagrouplabel := as.character(round(alpha, 1))]
alpha_group[ minalpha != maxalpha, alphagrouplabel := as.character(paste0(round(minalpha, 1), "-", round(maxalpha, 3)))]
alpha_group[, alphagrouplabel := paste0("α=", alphagrouplabel)]

algorithm_recall_onlyalphas[, alphagroup := alpha_group[match(algorithm_recall_onlyalphas$alpha, alpha_group$alpha)]$alphagrouplabel]

# Make alpha groups unique
algorithm_recall_onlyalphas <- unique(algorithm_recall_onlyalphas, by = c("alphagroup", "k"))

## Other algorithms vs best and worst-performing transitive algorithm
recall_plot_alphas <- ggplot(aes(x = k, y = recall, group = alphagroup, color = as.factor(alpha)), data = unique(algorithm_recall_onlyalphas)) +
				geom_line(size = 1.5, alpha = 1) +
				geom_point(size = 2) +
				scale_x_continuous(breaks = 1:10) +
  				geom_text(aes(label = alphagroup, colour = as.factor(alpha), group = alphagroup, x = k-2.5, y = recall), size = 8, data = algorithm_recall_onlyalphas[k == 4 & alpha == 0.1 ], hjust = 0.3) +
  				geom_text(aes(label = alphagroup, colour = as.factor(alpha), group = alphagroup, x = k-2.5, y = recall), size = 8, data = algorithm_recall_onlyalphas[k == 2 & alpha == 0.5 ], vjust = -4, hjust = 0.1) +
  				geom_text(aes(label = alphagroup, colour = as.factor(alpha), group = alphagroup, x = k-2.5, y = recall), size = 8, data = algorithm_recall_onlyalphas[k == 3 & alpha == 0.7 ], hjust = -0.1) +
  				geom_text(aes(label = alphagroup, colour = as.factor(alpha), group = alphagroup, x = k-2.5, y = recall), size = 8, data = algorithm_recall_onlyalphas[k == 1 & alpha == 0.8 ], vjust = 1, hjust = -0.1) +
  				geom_text(aes(label = alphagroup, colour = as.factor(alpha), group = alphagroup, x = k-2.5, y = recall), size = 8, data = algorithm_recall_onlyalphas[k == 2 & alpha == 0.9 ], hjust = 0.4) +
  				#geom_text_repel(aes(label = alphagroup, colour = as.factor(alpha), group = alphagroup, x = k-2.5, y = recall), size = 8, data = algorithm_recall_onlyalphas[k == 1], hjust = 0, min.segment.length = 10) +
				scale_colour_brewer(type = "qual", palette = 2) +
				labs(y = expression("Recall @ "~italic("k")), x = expression(italic("k"))) +
				expand_limits(x = c(-2, 10)) +
				theme_minimal() + theme(legend.position = "none", text = element_text(size = 18, family = "Open Sans"), panel.grid.major = element_blank(), panel.grid.minor = element_blank())

ggsave(recall_plot_alphas, file = "output/alphaicon_paper/recall_plot_alphas.pdf", width = 12, height = 9, device = cairo_pdf, scale = 0.6)

# For reporting: table with recall
algorithms_to_report <- c("baseline", "DPI", "NPI", "NPI SH", "α=0.0", "α=0.999", "α=0.999 SH", "α=0.999 SH DPI")

algorithm_recall[, recall := round(recall, 3) ]
algorithm_recall[ k == 1 & algorithm %in% algorithms_to_report, c("algorithm", "recall")]
algorithm_recall[ k == 3 & algorithm %in% algorithms_to_report, c("algorithm", "recall")]
algorithm_recall[ k == 5 & algorithm %in% algorithms_to_report, c("algorithm", "recall")]
algorithm_recall[ k == 10 & algorithm %in% algorithms_to_report, c("algorithm", "recall")]


########
# Produce the evaluation plots (by path length)

# One subset for display: only the best performing α-ICON versus other algorithms
algorithm_recall_by_pathlength_otheralgs <- algorithm_recall_by_pathlength[ algorithm %in% c("NPI", "α=0.999", "α=0.999 SH") ]

## Other algorithms vs best and worst-performing α-ICON algorithm
recall_path_plot <- ggplot(aes(x = path, y = recall, group = algorithm, color = algorithm), data = algorithm_recall_by_pathlength_otheralgs[ k == 1 ]) +
				geom_line(size = 1.5, alpha = 1) +
				geom_point(size = 2) +
				scale_x_continuous(breaks = 1:12) +
				scale_y_continuous(limits = c(0, 1)) +
				labs(y = "Recall @ 1 for parents within path", x = expression(italic("l")~", path length from subsidiary to parent")) +
				#scale_colour_brewer(type = "qual", palette = 3) +
				scale_color_manual(name = "", values = c("#b2df8a", "#33a02c", "#1f78b4")) +
				expand_limits(x = c(1, 12)) +
				theme_minimal() + theme(legend.position = "bottom", text = element_text(size = 18, family = "Open Sans"), panel.grid.major = element_blank(), panel.grid.minor = element_blank())

ggsave(recall_path_plot, file = "output/alphaicon_paper/recall_path_plot_at_1.pdf", width = 12, height = 9, device = cairo_pdf, scale = 0.6)

# For reporting: table with recall
algorithm_recall_by_pathlength[, recall := round(recall, 3) ]

algorithms_path_length_to_report <- c("baseline", "DPI", "NPI", "NPI SH", "α=0.0", "α=0.999", "α=0.999 SH", "α=0.999 SH DPI")

algorithm_recall_by_pathlength[ path == 1 & algorithm %in% algorithms_path_length_to_report & k == 1, c("algorithm", "recall") ]
algorithm_recall_by_pathlength[ path == 2 & algorithm %in% algorithms_path_length_to_report & k == 1, c("algorithm", "recall") ]
algorithm_recall_by_pathlength[ path == 3 & algorithm %in% algorithms_path_length_to_report & k == 1, c("algorithm", "recall") ]
algorithm_recall_by_pathlength[ path == 4 & algorithm %in% algorithms_path_length_to_report & k == 1, c("algorithm", "recall") ]
algorithm_recall_by_pathlength[ path == 5 & algorithm %in% algorithms_path_length_to_report & k == 1, c("algorithm", "recall") ]
algorithm_recall_by_pathlength[ path == 10 & algorithm %in% algorithms_path_length_to_report & k == 1, c("algorithm", "recall") ]
algorithm_recall_by_pathlength[ path == 12 & algorithm %in% algorithms_path_length_to_report & k == 1, c("algorithm", "recall") ]