# This code evaluates various algorithms # for ultimate owner identification library(data.table) library(stringi) library(stringr) library(igraph) # Declare working directory beforehand in an environment variable # ALPHAICON_PATH = "path_to_your_folder" # with the aid of usethis::edit_r_environ() # Restart R session for the changes to make effect setwd(Sys.getenv('ALPHAICON_PATH')) ############ # Load and prepare the data # Load the active participants snapshot of PSC (prepared by data_preparation/uk/2_psc_snapshot_to_participants_panel.r) psc <- fread("output/uk/uk_organisations_participants_2021_long_2aug21.csv", integer64 = "character", na.strings = "", encoding = "UTF-8") # Load the DPI of each participant after 10000 iterations (computed by alphaicon_paper/2_compute_npi_dpi.r) holders_dpi <- fread("output/uk/npi_dpi/10000iter/uk_organisations_participants_2021_long_7sep21_dpi_10000iter.csv", integer64 = "character", na.strings = "", encoding = "UTF-8") setnames(holders_dpi, c("entity", "participant", "dpi"), c("company_number", "participant_id", "share")) holders_dpi[, times_pivotal := NULL ] # Load the NPI of each participant after 10000 iterations (computed by alphaicon_paper/2_compute_npi_dpi.r) holders_npi <- fread("output/uk/npi_dpi/10000iter/uk_organisations_participants_2021_long_7sep21_npi_10000iter.csv", integer64 = "character", na.strings = "", encoding = "UTF-8") setnames(holders_npi, c("entity", "participant", "npi"), c("company_number", "participant_id", "share")) holders_npi[, times_pivotal := NULL ] # Load the transitive ownership data at various alpha # (prepared by alphaicon_paper/1_compute_alphaicon.ipynb) alphas <- c("0.0", "0.1", "0.2", "0.3", "0.4", "0.5", "0.6", "0.7", "0.8", "0.9", "0.999") for( a in alphas) { # Transitive ownership on equity shares temp1 <- fread(paste0("output/uk/transitive/uk_organisations_transitive_ownership_alpha", a, "_2021_long_2aug21.csv"), integer64 = "character", na.strings = "", encoding = "UTF-8", colClasses = c("character", "character", "numeric")) assign(paste0("holders_transitive_alpha", a), temp1) # Transitive ownership on DPI weights temp2 <- fread(paste0("output/uk/transitive/uk_organisations_transitive_ownership_alpha", a, "_2021_long_7sep21_dpi_10000iter.csv"), integer64 = "character", na.strings = "", encoding = "UTF-8", colClasses = c("character", "character", "numeric")) assign(paste0("holders_transitivedpi_alpha", a), temp2) rm(temp1); rm(temp2) } # Load the data on graph membership # (prepared by alphaicon_paper/1_compute_alphaicon.ipynb) graph_membership <- fread("output/uk/uk_organisations_participation_graph_core_periphery_membership_6aug21.csv", integer64 = "character", na.strings = "", encoding = "UTF-8", header = T, colClasses = c("numeric", "character", "factor")) graph_membership[, V1 := NULL ] setnames(graph_membership, "company_number/id", "participant_id") # Load the data for evaluation of the algorithms (prepared by data_preparation/uk/3_prepare_affiliated_entities_evaluation_data.r) affiliated_entities <- fread("data/uk/uk_parent_subsidiary_mapping_2020_2021_sec_filers_exhibit21.csv", integer64 = "character", na.strings = "", encoding = "UTF-8") # Remove the CorpWatch entity identifiers affiliated_entities[, c("subsidiary_cw_id", "parent_cw_id") := NULL ] # Add the data on the shortest paths from parent to subsidiary psc_graph <- graph_from_data_frame(psc[participant_id != company_number & !is.na(equity_share), c("participant_id", "company_number", "equity_share"), with = F], directed = T) # Compute all the distances on the graph # from parent companies in the gold standard # data to the subsidiates affiliated_distance_mat <- distances(psc_graph, v = which(V(psc_graph)$name %in% unique(affiliated_entities$participant_id)), to = which(V(psc_graph)$name %in% affiliated_entities$company_number), mode = "out") # Arrange them so that the distance is on the main diagonal affiliated_distance <- as.data.table(as.table(affiliated_distance_mat)) names(affiliated_distance) <- c("participant_id", "company_number", "path_length") # Remove non-reachable entries affiliated_distance <- affiliated_distance[ is.finite(path_length) ] # Add the path lengths to the affiliated entities data affiliated_entities <- merge(affiliated_entities, affiliated_distance, by = c("company_number", "participant_id"), all.x = T, all.y = F, sort = F) #affiliated_entities[, .N, by = "path_length"] #nrow(affiliated_entities[!is.na(path_length)] # 1007 # Keep only affiliated entities reachable on the PSC graph affiliated_entities <- affiliated_entities[ !is.na(path_length) ] # List all holder objects that are loaded in the memory holder_objects <- paste0("holders_", c("dpi", "npi", paste0("transitive_alpha", alphas), paste0("transitivedpi_alpha", alphas))) ############ # Subset holder objects # Keep only the affiliated entities in the holder objects (to speed up the computation time) for(obj in holder_objects) { assign(obj, get(obj)[ company_number %in% affiliated_entities$company_number | company_number %in% affiliated_entities$participant_id | participant_id %in% affiliated_entities$company_number | participant_id %in% affiliated_entities$participant_id]) # Add the entity type for objects assign(obj, merge(get(obj), graph_membership, by = "participant_id", all.x = T, all.y = F, sort = F)) } # Add the ranks for(obj in holder_objects) { # Sort by descending share within companies setorderv(get(obj), c("company_number", "share"), c(1, -1)) # Add the rank get(obj)[, rank := 1:.N, by = "company_number"] # Devise another rank # that ranks participants within types get(obj)[, rankbygraphtype := 1:.N, by = c("company_number", "type")] } # Debug: holders_transitive_alpha0.5[company_number == "SL033942"] # Create a single object with ranks under different algorithms algorithms <- c("dpi", "npi", paste0("transitive_alpha", alphas), paste0("transitivedpi_alpha", alphas)) # Start with the actual participants data (for the baseline algorithm) holders_algorithm_ranks <- psc[ company_number %in% affiliated_entities$company_number, c("company_number", "participant_id", "equity_share"), with = F] holders_algorithm_ranks <- holders_algorithm_ranks[ !is.na(equity_share) ] # For the baseline algorithm simply take the equity share setorderv(holders_algorithm_ranks, c("company_number", "equity_share"), c(1, -1)) holders_algorithm_ranks[, rank_baseline := 1:.N, by = "company_number" ] holders_algorithm_ranks[, equity_share := NULL ] # For the remaining ranks query the holders object for(alg in algorithms) { cols_to_keep <- intersect(names(get(paste0("holders_", alg))), c("company_number", "participant_id", "rank", "rankbygraphtype")) holders_algorithm_ranks <- merge(holders_algorithm_ranks, get(paste0("holders_", alg))[, c(cols_to_keep), with = F], by = c("company_number", "participant_id"), all.x = T, all.y = T, sort = F) setnames(holders_algorithm_ranks, "rank", paste0("rank_", alg)) setnames(holders_algorithm_ranks, "rankbygraphtype", paste0("rankbygraphtype_", alg), skip_absent = T) message(alg) } # Add the participant type in graph holders_algorithm_ranks[, type := graph_membership[ match(holders_algorithm_ranks$participant_id, graph_membership$participant_id)]$type ] # What rank do the actual super-holders from the affiliated entities evaluation data # take under each algorithm holders_algorithm_ranks_affiliated <- holders_algorithm_ranks[ company_number %in% affiliated_entities$company_number] # Add the ground truth participant id holders_algorithm_ranks_affiliated <- merge(holders_algorithm_ranks_affiliated, affiliated_entities[, -"path_length"], by = c("company_number"), all = T, sort = F) # NB: We have duplicates: # holders_algorithm_ranks_affiliated[ company_number == "00906355"] # Debug: #holders_algorithm_ranks[ company_number == "00027883"] #holders_algorithm_ranks_affiliated[ company_number == "00027883"] # Add the path lengths to the data holders_algorithm_ranks_affiliated <- merge(holders_algorithm_ranks_affiliated, affiliated_entities, by.x = c("company_number", "participant_id.y"), by.y = c("company_number", "participant_id"), sort = F, all.x = T, all.y = F) # What rank does the truth take under each algorithm algorithms <- unique(c("baseline", algorithms)) for(alg in algorithms) { holders_algorithm_ranks_affiliated[, paste0("rank_true_", alg) := NA_real_ ] holders_algorithm_ranks_affiliated[participant_id.x == participant_id.y, paste0("rank_true_", alg) := get(paste0("rank_", alg)) ] # Add the rank when we consider only super-holders if( !grepl("baseline", alg) ) { holders_algorithm_ranks_affiliated[ participant_id.x == participant_id.y & type == "SH", paste0("rankonlysuperholder_true_", alg) := get(paste0("rankbygraphtype_", alg))] # Restore the original value if no rank for super-holder holders_algorithm_ranks_affiliated[ is.na(get(paste0("rankonlysuperholder_true_", alg))) , paste0("rankonlysuperholder_true_", alg) := get(paste0("rank_true_", alg)) ] } } # Aggregate at company level cols_of_interest <- names(holders_algorithm_ranks_affiliated)[ grepl("rank.*_true_", names(holders_algorithm_ranks_affiliated))] cols_of_interest <- c(cols_of_interest, "path_length") affiliated_evaluation_results <- holders_algorithm_ranks_affiliated[, lapply(.SD, min, na.rm = T), .SDcols = cols_of_interest, by = "company_number"] affiliated_evaluation_results[, c(cols_of_interest) := lapply(.SD, function(x) ifelse(is.finite(x), x, NA)), .SDcols = cols_of_interest] # With the above min-aggregation we resolve the issue with duplicates # affiliated_evaluation_results[ company_number == "00906355"] # Compute recall @ different k for algorithms ## Init an object to store the recall algorithm_recall <- data.table() ## Init an object to store the recall at different path lengths algorithm_recall_by_pathlength <- data.table() path_lengths <- unique(holders_algorithm_ranks_affiliated$path_length) path_lengths <- path_lengths[ order(path_lengths) ] path_lengths <- path_lengths[ !is.na(path_lengths) ] for(k in 1:10) { for( alg in algorithms ) { # How many organisations have the true participant out of all k participants recall <- nrow(affiliated_evaluation_results[ get(paste0("rank_true_", alg)) <= k ])/nrow(affiliated_evaluation_results) algorithm_recall <- rbind(algorithm_recall, data.table(algorithm = alg, k = k, superholderpriority = 0, recall = recall), fill = T) # Special recall considering only super-holders if( !grepl("baseline", alg) ) { recallsh <- nrow(affiliated_evaluation_results[ get(paste0("rankonlysuperholder_true_", alg)) <= k ])/nrow(affiliated_evaluation_results) algorithm_recall <- rbind(algorithm_recall, data.table(algorithm = alg, k = k, superholderpriority = 1, recall = recallsh), fill = T) } # Same for each path lengths for( p in path_lengths ) { recallpath <- nrow(affiliated_evaluation_results[ get(paste0("rank_true_", alg)) <= k & path_length == p ])/nrow(affiliated_evaluation_results[ path_length == p | is.na(path_length) ]) algorithm_recall_by_pathlength <- rbind(algorithm_recall_by_pathlength, data.table(algorithm = alg, k = k, superholderpriority = 0, path = p, recall = recallpath), fill = T) if( !grepl("baseline", alg) ) { recallpathsh <- nrow(affiliated_evaluation_results[ get(paste0("rankonlysuperholder_true_", alg)) <= k & path_length == p ])/nrow(affiliated_evaluation_results[ path_length == p | is.na(path_length) ] ) algorithm_recall_by_pathlength <- rbind(algorithm_recall_by_pathlength, data.table(algorithm = alg, k = k, superholderpriority = 1, path = p, recall = recallpathsh), fill = T) } } } } # Save point fwrite(algorithm_recall, file = "output/alphaicon_paper/uk_orgs_algorithm_evaluation_recall.csv") fwrite(algorithm_recall_by_pathlength, file = "output/alphaicon_paper/uk_orgs_algorithm_evaluation_recall_by_pathlength.csv") # Manual inspection for tables #algorithm_recall[ algorithm %in% c("baseline", "dpi", "npi", "transitive_alpha0.0", "transitive_alpha0.999", "transitivedpi_alpha0.999") & k == 1 ] #algorithm_recall[ algorithm %in% c("baseline", "dpi", "npi", "transitive_alpha0.0", "transitive_alpha0.999", "transitivedpi_alpha0.999") & k == 3 ] #algorithm_recall[ algorithm %in% c("baseline", "dpi", "npi", "transitive_alpha0.0", "transitive_alpha0.999", "transitivedpi_alpha0.999") & k == 5 ] #algorithm_recall[ algorithm %in% c("baseline", "dpi", "npi", "transitive_alpha0.0", "transitive_alpha0.999", "transitivedpi_alpha0.999") & k == 10 ] #algorithm_recall_by_pathlength[ algorithm %in% c("baseline", "dpi", "npi", "transitive_alpha0.0", "transitive_alpha0.999", "transitivedpi_alpha0.999") & k == 1 & path == 1 ] #algorithm_recall_by_pathlength[ algorithm %in% c("baseline", "dpi", "npi", "transitive_alpha0.0", "transitive_alpha0.999", "transitivedpi_alpha0.999") & k == 1 & path == 2 ] #algorithm_recall_by_pathlength[ algorithm %in% c("baseline", "dpi", "npi", "transitive_alpha0.0", "transitive_alpha0.999", "transitivedpi_alpha0.999") & k == 1 & path == 3 ] #algorithm_recall_by_pathlength[ algorithm %in% c("baseline", "dpi", "npi", "transitive_alpha0.0", "transitive_alpha0.999", "transitivedpi_alpha0.999") & k == 1 & path == 4 ] #algorithm_recall_by_pathlength[ algorithm %in% c("baseline", "dpi", "npi", "transitive_alpha0.0", "transitive_alpha0.999", "transitivedpi_alpha0.999") & k == 1 & path == 5 ] #algorithm_recall_by_pathlength[ algorithm %in% c("baseline", "dpi", "npi", "transitive_alpha0.0", "transitive_alpha0.999", "transitivedpi_alpha0.999") & k == 1 & path == 10 ] #algorithm_recall_by_pathlength[ algorithm %in% c("baseline", "dpi", "npi", "transitive_alpha0.0", "transitive_alpha0.999", "transitivedpi_alpha0.999") & k == 1 & path == 12 ] ############ # Plot the results library(data.table) library(ggplot2) library(ggthemes) library(ggrepel) library(stringi) library(showtext) setwd(Sys.getenv('ALPHAICON_PATH')) # Add the font to use font_add_google("Open Sans", "Open Sans") showtext_auto() # Load the files with evaluation results algorithm_recall <- fread("output/alphaicon_paper/uk_orgs_algorithm_evaluation_recall.csv", encoding = "UTF-8") algorithm_recall_by_pathlength <- fread("output/alphaicon_paper/uk_orgs_algorithm_evaluation_recall_by_pathlength.csv", encoding = "UTF-8") for(obj in c("algorithm_recall", "algorithm_recall_by_pathlength") ) { # Column marking whether we use DPI weights get(obj)[, dpi_weights := 0] get(obj)[grepl("dpi", algorithm, ignore.case = T), dpi_weights := 1] # Shorter names get(obj)[, algorithm := gsub("transitive.*_alpha", "α=", algorithm)] get(obj)[, algorithm := gsub("transitivedpi_alpha", "α=", algorithm)] # Extract alpha values from the name get(obj)[, alpha := as.numeric(stri_split_fixed(algorithm, "α=", n = 2, simplify = T)[,2]) ] # Add the superholder mark to corresponding algorithm names get(obj)[ superholderpriority == 1, algorithm := paste0(algorithm, " SH")] # Add the DPI weight mark to corresponding algorithm names get(obj)[ dpi_weights == 1 & grepl("α", algorithm), algorithm := paste0(algorithm, " DPI")] # Certain names to upper case get(obj)[, algorithm := gsub("dpi", "DPI", algorithm) ] get(obj)[, algorithm := gsub("npi", "NPI", algorithm) ] } ######## # Produce the evaluation plots (overall) # One subset for display: only the best performing α-ICON versus other algorithms algorithm_recall_otheralgs <- algorithm_recall[ algorithm %in% c("DPI", "NPI", "α=0.999", "α=0.999 SH")] ## Other algorithms vs best and worst-performing α-ICON algorithm recall_plot <- ggplot(aes(x = k, y = recall, group = algorithm, color = algorithm), data = algorithm_recall_otheralgs) + geom_line(size = 1.5, alpha = 1) + geom_point(size = 2) + scale_x_continuous(breaks = 1:10) + geom_text(aes(label = algorithm, colour = algorithm, x = k-1, y = recall), data = algorithm_recall_otheralgs[k == 1 & algorithm == "DPI"], size = 8, hjust = 0.3) + geom_text(aes(label = algorithm, colour = algorithm, x = k-1, y = recall), data = algorithm_recall_otheralgs[k == 1 & algorithm == "α=0.999"], size = 8, hjust = 0.7) + geom_text(aes(label = algorithm, colour = algorithm, x = k-1, y = recall), data = algorithm_recall_otheralgs[k == 1 & algorithm == "NPI"], size = 8, hjust = 0.3) + geom_text(aes(label = algorithm, colour = algorithm, x = k-1, y = recall), data = algorithm_recall_otheralgs[k == 1 & algorithm == "α=0.999 SH"], size = 8, hjust = 0.5, vjust = -0.6) + #geom_text_repel(aes(label = algorithm, colour = algorithm, x = k-1, y = recall), data = algorithm_recall_otheralgs[k == 1], size = 8, hjust = 0.5) + labs(y = expression("Recall @ "~italic("k")), x = expression(italic("k"))) + #scale_colour_brewer(type = "qual", palette = 2) + scale_color_manual(name = "", values = c("#b2df8a", "#33a02c", "#a6cee3", "#1f78b4")) + expand_limits(x = c(-1.5, 10)) + theme_minimal() + theme(legend.position = "none", text = element_text(size = 18, family = "Open Sans"), panel.grid.major = element_blank(), panel.grid.minor = element_blank()) ggsave(recall_plot, file = "output/alphaicon_paper/recall_plot.pdf", width = 12, height = 9, device = cairo_pdf, scale = 0.6) # Another subset for display: only α-ICON under different alphas algorithm_recall_onlyalphas <- algorithm_recall[ alpha > 0 & superholderpriority == 1 & dpi_weights == 0] # Group the duplicated values of recall at alphas alpha_group <- algorithm_recall_onlyalphas[ k == 1, list(alphagroup = .GRP), by = "recall"] alpha_group <- merge(alpha_group, algorithm_recall_onlyalphas[ k == 1, c("alpha", "recall")], by = "recall") alpha_group[, recall := NULL] # Generate the label text for alpha group alpha_group[, minalpha := min(alpha), by = "alphagroup"] alpha_group[, maxalpha := max(alpha), by = "alphagroup"] alpha_group[ minalpha == maxalpha, alphagrouplabel := as.character(round(alpha, 1))] alpha_group[ minalpha != maxalpha, alphagrouplabel := as.character(paste0(round(minalpha, 1), "-", round(maxalpha, 3)))] alpha_group[, alphagrouplabel := paste0("α=", alphagrouplabel)] algorithm_recall_onlyalphas[, alphagroup := alpha_group[match(algorithm_recall_onlyalphas$alpha, alpha_group$alpha)]$alphagrouplabel] # Make alpha groups unique algorithm_recall_onlyalphas <- unique(algorithm_recall_onlyalphas, by = c("alphagroup", "k")) ## Other algorithms vs best and worst-performing transitive algorithm recall_plot_alphas <- ggplot(aes(x = k, y = recall, group = alphagroup, color = as.factor(alpha)), data = unique(algorithm_recall_onlyalphas)) + geom_line(size = 1.5, alpha = 1) + geom_point(size = 2) + scale_x_continuous(breaks = 1:10) + geom_text(aes(label = alphagroup, colour = as.factor(alpha), group = alphagroup, x = k-2.5, y = recall), size = 8, data = algorithm_recall_onlyalphas[k == 4 & alpha == 0.1 ], hjust = 0.3) + geom_text(aes(label = alphagroup, colour = as.factor(alpha), group = alphagroup, x = k-2.5, y = recall), size = 8, data = algorithm_recall_onlyalphas[k == 2 & alpha == 0.5 ], vjust = -4, hjust = 0.1) + geom_text(aes(label = alphagroup, colour = as.factor(alpha), group = alphagroup, x = k-2.5, y = recall), size = 8, data = algorithm_recall_onlyalphas[k == 3 & alpha == 0.7 ], hjust = -0.1) + geom_text(aes(label = alphagroup, colour = as.factor(alpha), group = alphagroup, x = k-2.5, y = recall), size = 8, data = algorithm_recall_onlyalphas[k == 1 & alpha == 0.8 ], vjust = 1, hjust = -0.1) + geom_text(aes(label = alphagroup, colour = as.factor(alpha), group = alphagroup, x = k-2.5, y = recall), size = 8, data = algorithm_recall_onlyalphas[k == 2 & alpha == 0.9 ], hjust = 0.4) + #geom_text_repel(aes(label = alphagroup, colour = as.factor(alpha), group = alphagroup, x = k-2.5, y = recall), size = 8, data = algorithm_recall_onlyalphas[k == 1], hjust = 0, min.segment.length = 10) + scale_colour_brewer(type = "qual", palette = 2) + labs(y = expression("Recall @ "~italic("k")), x = expression(italic("k"))) + expand_limits(x = c(-2, 10)) + theme_minimal() + theme(legend.position = "none", text = element_text(size = 18, family = "Open Sans"), panel.grid.major = element_blank(), panel.grid.minor = element_blank()) ggsave(recall_plot_alphas, file = "output/alphaicon_paper/recall_plot_alphas.pdf", width = 12, height = 9, device = cairo_pdf, scale = 0.6) # For reporting: table with recall algorithms_to_report <- c("baseline", "DPI", "NPI", "NPI SH", "α=0.0", "α=0.999", "α=0.999 SH", "α=0.999 SH DPI") algorithm_recall[, recall := round(recall, 3) ] algorithm_recall[ k == 1 & algorithm %in% algorithms_to_report, c("algorithm", "recall")] algorithm_recall[ k == 3 & algorithm %in% algorithms_to_report, c("algorithm", "recall")] algorithm_recall[ k == 5 & algorithm %in% algorithms_to_report, c("algorithm", "recall")] algorithm_recall[ k == 10 & algorithm %in% algorithms_to_report, c("algorithm", "recall")] ######## # Produce the evaluation plots (by path length) # One subset for display: only the best performing α-ICON versus other algorithms algorithm_recall_by_pathlength_otheralgs <- algorithm_recall_by_pathlength[ algorithm %in% c("NPI", "α=0.999", "α=0.999 SH") ] ## Other algorithms vs best and worst-performing α-ICON algorithm recall_path_plot <- ggplot(aes(x = path, y = recall, group = algorithm, color = algorithm), data = algorithm_recall_by_pathlength_otheralgs[ k == 1 ]) + geom_line(size = 1.5, alpha = 1) + geom_point(size = 2) + scale_x_continuous(breaks = 1:12) + scale_y_continuous(limits = c(0, 1)) + labs(y = "Recall @ 1 for parents within path", x = expression(italic("l")~", path length from subsidiary to parent")) + #scale_colour_brewer(type = "qual", palette = 3) + scale_color_manual(name = "", values = c("#b2df8a", "#33a02c", "#1f78b4")) + expand_limits(x = c(1, 12)) + theme_minimal() + theme(legend.position = "bottom", text = element_text(size = 18, family = "Open Sans"), panel.grid.major = element_blank(), panel.grid.minor = element_blank()) ggsave(recall_path_plot, file = "output/alphaicon_paper/recall_path_plot_at_1.pdf", width = 12, height = 9, device = cairo_pdf, scale = 0.6) # For reporting: table with recall algorithm_recall_by_pathlength[, recall := round(recall, 3) ] algorithms_path_length_to_report <- c("baseline", "DPI", "NPI", "NPI SH", "α=0.0", "α=0.999", "α=0.999 SH", "α=0.999 SH DPI") algorithm_recall_by_pathlength[ path == 1 & algorithm %in% algorithms_path_length_to_report & k == 1, c("algorithm", "recall") ] algorithm_recall_by_pathlength[ path == 2 & algorithm %in% algorithms_path_length_to_report & k == 1, c("algorithm", "recall") ] algorithm_recall_by_pathlength[ path == 3 & algorithm %in% algorithms_path_length_to_report & k == 1, c("algorithm", "recall") ] algorithm_recall_by_pathlength[ path == 4 & algorithm %in% algorithms_path_length_to_report & k == 1, c("algorithm", "recall") ] algorithm_recall_by_pathlength[ path == 5 & algorithm %in% algorithms_path_length_to_report & k == 1, c("algorithm", "recall") ] algorithm_recall_by_pathlength[ path == 10 & algorithm %in% algorithms_path_length_to_report & k == 1, c("algorithm", "recall") ] algorithm_recall_by_pathlength[ path == 12 & algorithm %in% algorithms_path_length_to_report & k == 1, c("algorithm", "recall") ]