# This code sums the computed control shares # of the super-holders in the PSC data and # produces the rankings under different algorithms # for the α-ICON paper library(data.table) library(lubridate) library(stringi) library(stringr) library(stargazer) # Declare working directory beforehand in an environment variable # ALPHAICON_PATH = "path_to_your_folder" # with the aid of usethis::edit_r_environ() # Restart R session for the changes to make effect setwd(Sys.getenv('ALPHAICON_PATH')) ############ # Data loading # Load the active participants snapshot of PSC (prepared by data_preparation/uk/2_psc_snapshot_to_participants_panel.r) psc <- fread("output/uk/uk_organisations_participants_2021_long_2aug21.csv", integer64 = "character", na.strings = "", encoding = "UTF-8") # Load the companies data (prepared by data_preparation/uk/1b_process_companies_data.r) load("data/uk/uk_basic_companies_data_2021-08-01.rdata") # Load the DPI of each participant after 10000 iterations (computed by alphaicon_paper/2_compute_npi_dpi.r) holders_dpi <- fread("output/uk/npi_dpi/10000iter/uk_organisations_participants_2021_long_7sep21_dpi_10000iter.csv", integer64 = "character", na.strings = "", encoding = "UTF-8") setnames(holders_dpi, c("entity", "participant", "dpi"), c("company_number", "participant_id", "share")) holders_dpi[, times_pivotal := NULL ] # Load the NPI of each participant after 10000 iterations (computed by alphaicon_paper/2_compute_npi_dpi.r) holders_npi <- fread("output/uk/npi_dpi/10000iter/uk_organisations_participants_2021_long_7sep21_npi_10000iter.csv", integer64 = "character", na.strings = "", encoding = "UTF-8") setnames(holders_npi, c("entity", "participant", "npi"), c("company_number", "participant_id", "share")) holders_npi[, times_pivotal := NULL ] # Load the transitive ownership data at various alpha # (prepared by alphaicon_paper/1_compute_alphaicon.ipynb) alphas <- c("0.0", "0.1", "0.2", "0.3", "0.4", "0.5", "0.6", "0.7", "0.8", "0.9", "0.999") for( a in alphas) { temp <- fread(paste0("output/uk/transitive/uk_organisations_transitive_ownership_alpha", a, "_2021_long_2aug21.csv"), integer64 = "character", na.strings = "", encoding = "UTF-8", colClasses = c("character", "character", "numeric")) assign(paste0("holders_transitive_alpha", a), temp) rm(temp) } # Load the data on graph membership # (prepared by alphaicon_paper/1_compute_alphaicon.ipynb) graph_membership <- fread("output/uk/uk_organisations_participation_graph_core_periphery_membership_6aug21.csv", integer64 = "character", na.strings = "", encoding = "UTF-8", header = T, colClasses = c("numeric", "character", "factor")) graph_membership[, V1 := NULL ] setnames(graph_membership, "company_number/id", "participant_id") # All holder objects that are loaded in the memory holder_objects <- paste0("holders_", c("dpi", "npi", paste0("transitive_alpha", alphas))) # Add the graph membership and remove self-loops for(obj in holder_objects) { get(obj)[, participant_type := graph_membership[ match(get(obj)$participant_id, graph_membership$participant_id) ]$type ] get(obj)[, company_type := graph_membership[ match(get(obj)$company_number, graph_membership$participant_id) ]$type ] assign(obj, get(obj)[ company_number != participant_id ]) } psc[, participant_type := graph_membership[ match(psc$participant_id, graph_membership$participant_id) ]$type ] psc[, company_type := graph_membership[ match(psc$company_number, graph_membership$participant_id) ]$type ] ############ # Identify top holders # Compute total holdings (weighted and unweighted by assets) holdings_dpi <- holders_dpi[, list( sumdpi = sum(share)), by = "participant_id"] holdings_dpi[, rankdpi := frankv(sumdpi, order = -1, ties.method = "first")] holdings_npi <- holders_npi[, list( sumnpi = sum(share)), by = "participant_id"] holdings_npi[, ranknpi := frankv(sumnpi, order = -1, ties.method = "first")] # Normalize shares by type to one for our transitive data holders_transitive_alpha0.999[, share_typenormalized := share/sum(share), by = c("company_number", "participant_type")] # Keep only super-holders and sum up their shares holdings_transitive <- holders_transitive_alpha0.999[participant_type == "SH", list( sumtransitive = sum(share_typenormalized)), by = "participant_id"] # Rank the holders by their share holdings_transitive[, ranktransitive := frankv(sumtransitive, order = -1, ties.method = "first")] # Top-100 participants by our metric compared to other metrics top_holders <- setorderv(holdings_transitive, "ranktransitive", 1)[1:100] # Add ranks from other algorithms top_holders <- merge(top_holders, holdings_dpi, by = "participant_id", all.x = T, all.y = F, sort = F) top_holders <- merge(top_holders, holdings_npi, by = "participant_id", all.x = T, all.y = F, sort = F) # Proper names ## First, match on company number with companies data top_holders[, participant_name := uk_basic_companies_data[ match(top_holders$participant_id, uk_basic_companies_data$CompanyNumber)]$CompanyName ] ## Second, extract the company name top_holders[ str_count(participant_id, fixed("$")) == 2, participant_name := stri_split_fixed(participant_id, "$", n = 3, simplify = T)[,1] ] ## Third, add the first and last names of the persons top_holders[ str_count(participant_id, fixed("$")) == 4, participant_name := paste0(stri_split_fixed(participant_id, "$", n = 4, simplify = T)[, 1], " ", stri_split_fixed(participant_id, "$", n = 4, simplify = T)[, 3]) ] ## We no longer need participant id #top_holders[, participant_id := NULL ] # Proper column order setcolorder(top_holders, c("participant_name", "participant_id", "ranktransitive", "ranknpi", "rankdpi", "sumtransitive", "sumnpi", "sumdpi")) # Round the score top_holders[, c("sumtransitive", "sumnpi", "sumdpi") := lapply(.SD, round, 1), .SDcols = c("sumtransitive", "sumnpi", "sumdpi") ] # Add the size of the network of holdings and the core-shell distribution top_holders_network_chars <- data.table() for(superholder_id in top_holders$participant_id) { # Debug: superholder_id <- "SC095000" # Ultimate companies held by this organization holders_subset_ultimate <- holders_transitive_alpha0.999[ participant_id %in% superholder_id ] # Consider their total interest in all entities holders_subset <- psc[ participant_id %in% unique(holders_subset_ultimate$company_number) | company_number %in% unique(holders_subset_ultimate$company_number) ] holders_subset <- holders_subset[ !is.na(equity_share)] holders_subset <- holders_subset[ participant_id != company_number] # Share to 0-1 range setnames(holders_subset, "equity_share", "share") holders_subset[, share := share/100] ## Create an object with vertex metadata vertex_metadata <- unique(rbind(unique(data.table( name = holders_subset$company_number, type = holders_subset$company_type)), data.table( name = holders_subset$participant_id, type = holders_subset$participant_type) ), by = "name") # Count all vertex types out <- vertex_metadata[, .N, by = "type"] # Transpose out <- dcast(melt(out, id.vars = "type"), variable ~ type) out[, variable := NULL ] # Keep only the columns of interest in the order of interest types_of_interest <- c("SH", "ST", "I", "C") names_outside_types_of_interest <- types_of_interest[!(types_of_interest %in% names(out))] if( length(names_outside_types_of_interest) > 0) { out[, c(names_outside_types_of_interest) := 0] } out <- out[, types_of_interest, with = F] # Add the network size out <- data.table(participant_id = superholder_id, nodes = nrow(vertex_metadata), out) top_holders_network_chars <- rbind(top_holders_network_chars, out, fill = T) message(superholder_id) } # Add to the top holders data top_holders <- merge(top_holders, top_holders_network_chars, by = "participant_id", sort = F) # Write to CSV fwrite(top_holders, file = "output/alphaicon_paper/uk_organisations_top100_holders_2021_long_2aug21.csv") # Produce a TeX table #stargazer(top_holders[1:25, -"participant_id"], type = "latex", summary = F, out = "output/alphaicon_paper/uk_organisations_top25_holders_2021_long_2aug21.tex", float = F, multicolumn = T, digits = 1) # Export the information on top-25 entities with the largest deviation between ranks combined_holdings <- merge(holdings_dpi, holdings_npi, by = "participant_id", all = T) combined_holdings <- merge(combined_holdings, holdings_transitive, by = "participant_id", all = T) combined_holdings[, diff_npi_dpi := sumnpi - sumdpi ] combined_holdings[, diff_transitive_dpi := sumtransitive - sumdpi ] combined_holdings[, diff_transitive_npi := sumtransitive - sumnpi ] top100_holders_diff_npi_dpi <- setorderv(combined_holdings, "diff_npi_dpi", -1, na.last = T)[1:100] top100_holders_diff_transitive_dpi <- setorderv(combined_holdings, "diff_transitive_dpi", -1, na.last = T)[1:100] top100_holders_diff_transitive_npi <- setorderv(combined_holdings, "diff_transitive_npi", -1, na.last = T)[1:100] # Export point #fwrite(top100_holders_diff_npi_dpi, file = "output/alphaicon_paper/uk_organisations_top100_holders_diff_npi_dpi_2021_long_2aug21.csv") #fwrite(top100_holders_diff_transitive_dpi, file = "output/alphaicon_paper/uk_organisations_top100_holders_diff_transitive_dpi_2021_long_2aug21.csv") #fwrite(top100_holders_diff_transitive_npi, file = "output/alphaicon_paper/uk_organisations_top100_holders_diff_transitive_npi_2021_long_2aug21.csv") ############ # Agreement between rankings # Add the ranks (takes time) for(obj in holder_objects) { # Create temporary variable prioritising super-holders if( grepl("transitive", obj) ) { get(obj)[, superholder := 0] get(obj)[participant_type == "SH", superholder := 1] } # Sort by descending share within companies if( grepl("transitive", obj) ) { setorderv(get(obj), c("company_number", "superholder", "share"), c(1, -1, -1)) } else { setorderv(get(obj), c("company_number", "share"), c(1, -1)) } # Add the rank that prioritises super-holders get(obj)[, rank := 1:.N, by = c("company_number")] message(obj) } # Combine the ranks from all the methods vars_to_keep <- c("company_number", "participant_id", "rank") ## DPI and NPI holders_rankings <- merge(holders_dpi[, c(vars_to_keep), with = F], holders_npi[, c(vars_to_keep), with = F], by = c("company_number", "participant_id"), all = T) setnames(holders_rankings, c("rank.x", "rank.y"), c("rank_dpi", "rank_npi")) ## Add transitive holder hankings (only super-holders or top-ranking non-super-holders) for( a in alphas) { holders_rankings <- merge(holders_rankings, get(paste0("holders_transitive_alpha", a))[ participant_type == "SH" | (participant_type != "SH" & rank == 1), c(vars_to_keep), with = F], by = c("company_number", "participant_id"), all = T) setnames(holders_rankings, c("rank"), paste0("rank_transitive", a)) } # Count holders by company from each method holders_rankings[, countholders_dpi := sum(!is.na(rank_dpi)), by = "company_number"] holders_rankings[, countholders_npi := sum(!is.na(rank_npi)), by = "company_number"] for( a in alphas) { holders_rankings[, paste0("countholders_transitive", a) := sum(!is.na(get(paste0("rank_transitive", a)))), by = "company_number"] message(a) } # Pairs of counts of holders by company holders_rankings[, countholders_dpi_npi := sum(!is.na(rank_dpi) & !is.na(rank_npi)), by = "company_number"] for( a in alphas) { holders_rankings[, paste0("countholders_dpi_transitive", a) := sum(!is.na(rank_dpi) & !is.na(get(paste0("rank_transitive", a)))), by = "company_number"] holders_rankings[, paste0("countholders_npi_transitive", a) := sum(!is.na(rank_npi) & !is.na(get(paste0("rank_transitive", a)))), by = "company_number"] message(a) } ## Compute company-level Kendal's tau-b rank correlation coefficient ### (takes about 15 minutes per pair) tau_dpi_npi <- holders_rankings[ countholders_dpi_npi > 1 & !is.na(rank_dpi) & !is.na(rank_npi), list(kendalltau_dpi_npi = cor(rank_dpi, rank_npi, method = "kendall", use = "pairwise.complete.obs")), by = "company_number"] for( a in alphas) { eval(parse(text = paste0("tau_dpi_transitive", a, " <- holders_rankings[ countholders_dpi_transitive", a, " > 1 & !is.na(rank_dpi) & !is.na(rank_transitive", a, "), list(kendalltau_dpi_transitive", a, " = cor(rank_dpi, rank_transitive", a, ", method = 'kendall', use = 'pairwise.complete.obs')), by = 'company_number']"))) eval(parse(text = paste0("tau_npi_transitive", a, " <- holders_rankings[ countholders_npi_transitive", a, " > 1 & !is.na(rank_npi) & !is.na(rank_transitive", a, "), list(kendalltau_npi_transitive", a, " = cor(rank_npi, rank_transitive", a, ", method = 'kendall', use = 'pairwise.complete.obs')), by = 'company_number']"))) message(a) } # Examples of companies with complex rankings #holders_rankings[ company_number == "00026306"] #holders_rankings[ company_number == "00000529"] #holders_rankings[ company_number == "00018751"] # Debug: these should be zeros #holders_rankings[ countholders_dpi == 1 & countholders_npi == 1 & rank_dpi != rank_npi] #holders_rankings[ countholders_transitive0.999 == 1 & countholders_npi == 1 & rank_transitive0.999 != rank_npi] # Add the tau-b = 1 for companies with one participant (both direct and indirect) tau_dpi_npi <- rbind(tau_dpi_npi, data.table(company_number = unique(holders_rankings[ countholders_dpi == 1 & countholders_npi == 1 & !is.na(rank_dpi) & !is.na(rank_npi) ]$company_number), kendalltau_dpi_npi = 1)) for( a in alphas) { eval(parse(text = paste0("tau_dpi_transitive", a, " <- rbind(tau_dpi_transitive", a, ", data.table( company_number = unique(holders_rankings[ countholders_dpi == 1 & countholders_transitive", a, " == 1 & !is.na(rank_dpi) & !is.na(rank_transitive", a, ")]$company_number), kendalltau_dpi_transitive", a, " = 1))"))) eval(parse(text = paste0("tau_npi_transitive", a, " <- rbind(tau_npi_transitive", a, ", data.table( company_number = unique(holders_rankings[ countholders_npi == 1 & countholders_transitive", a, " == 1 & !is.na(rank_npi) & !is.na(rank_transitive", a, ")]$company_number), kendalltau_npi_transitive", a, " = 1))"))) } # Add the tau-b = -1 for companies with differing number of participants # where ranks do not match. Example: holders_rankings[ company_number == "00026306"] tau_dpi_npi <- rbind(tau_dpi_npi, data.table(company_number = unique(holders_rankings[ !(company_number %in% tau_dpi_npi$company_number)]$company_number), kendalltau_dpi_npi = -1)) for( a in alphas) { eval(parse(text = paste0("tau_dpi_transitive", a, " <- rbind(tau_dpi_transitive", a, ", data.table( company_number = unique(holders_rankings[ !(company_number %in% tau_dpi_transitive", a, "$company_number) ]$company_number), kendalltau_dpi_transitive", a, " = -1))"))) eval(parse(text = paste0("tau_npi_transitive", a, " <- rbind(tau_npi_transitive", a, ", data.table( company_number = unique(holders_rankings[ !(company_number %in% tau_npi_transitive", a, "$company_number) ]$company_number), kendalltau_npi_transitive", a, " = -1))"))) } # Finally, we need to fill in NAs like tau_dpi_transitive0.999[company_number == "00001615"] # holders_rankings[ company_number == "00001615" ] # holders_rankings[ company_number == "SO307326" ] # holders_rankings[ company_number == "00000529" ] # Merge the taus for( a in alphas) { if( which(alphas == a) == 1 ) { kendall_taus_participant_ranks <- merge(tau_dpi_npi, get(paste0("tau_dpi_transitive", a)), by = "company_number", all = T) kendall_taus_participant_ranks <- merge(kendall_taus_participant_ranks, get(paste0("tau_npi_transitive", a)), by = "company_number", all = T) } else { kendall_taus_participant_ranks <- merge(kendall_taus_participant_ranks, get(paste0("tau_dpi_transitive", a)), by = "company_number", all = T) kendall_taus_participant_ranks <- merge(kendall_taus_participant_ranks, get(paste0("tau_npi_transitive", a)), by = "company_number", all = T) } message(a) } # Add the information on the count of holders kendall_taus_participant_ranks <- merge(kendall_taus_participant_ranks, unique(holders_rankings[, c("company_number", "countholders_dpi", "countholders_npi", paste0("countholders_transitive", alphas)), with = F], by = "company_number"), by = "company_number", all.x = T, all.y = F) # Save point fwrite(kendall_taus_participant_ranks, file = "output/alphaicon_paper/kendall_taus_participant_ranks_dpi_npi_transitive_uk_organisations_participants_2021_7sep21.csv") ############ # Analyse the agreement between rankings library(data.table) library(ggplot2) library(ggthemes) library(ggrepel) library(stringi) library(stringr) library(stargazer) library(showtext) setwd(Sys.getenv('ALPHAICON_PATH')) # Add the font to use font_add_google("Open Sans", "Open Sans") showtext_auto() # Load the produced CSV with Kendall's tau-b taus_longs <- fread("output/alphaicon_paper/kendall_taus_participant_ranks_dpi_npi_transitive_uk_organisations_participants_2021_7sep21.csv", encoding = "UTF-8") # To long form (each line is method and alpha) taus <- melt(taus_longs, id.vars = "company_number", measure.vars = names(taus_longs)[grepl("kendalltau", names(taus_longs))], variable.name = "pair", value.name = "tau", na.rm = T, variable.factor = T) countholders <- melt(taus_longs, id.vars = "company_number", measure.vars = names(taus_longs)[grepl("countholders", names(taus_longs))], variable.name = "algorithm", value.name = "countholders", na.rm = T, variable.factor = T) # Convert pairs to extract useful information alg_pairs <- data.table(pair = levels(taus$pair)) alg_pairs[, pair_modif := gsub("kendalltau_", "", pair, fixed = T)] alg_pairs[, c("algorithm1", "algorithm2") := as.data.table(stri_split_fixed(pair_modif, "_", n = 2, simplify = T))] alg_pairs[, pair_modif := NULL] alg_pairs[, pair := factor(pair, levels = levels(taus$pair))] alg_pairs[, alpha := NA_real_ ] alg_pairs[ grepl("transitive", algorithm2), alpha := as.numeric(gsub("transitive", "", algorithm2))] alg_pairs[ grepl("transitive", algorithm2), algorithm2 := gsub("[^transitive]", "", algorithm2)] alg_pairs[, algorithm1 := factor(algorithm1, levels = c("dpi", "npi", "transitive"))] alg_pairs[, algorithm2 := factor(algorithm2, levels = c("dpi", "npi", "transitive"))] taus <- merge(taus, alg_pairs, by = "pair") taus[, pair := NULL] gc() # Same for countholders alg_counts <- data.table(algorithm = levels(countholders$algorithm)) alg_counts[, alg_modif := gsub("countholders_", "", algorithm)] alg_counts[ grepl("transitive", alg_modif), alpha := as.numeric(gsub("transitive", "", alg_modif))] alg_counts[ grepl("transitive", alg_modif), alg_modif := gsub("[^transitive]", "", alg_modif)] alg_counts[, alg_modif := factor(alg_modif, levels = c("dpi", "npi", "transitive"))] countholders <- merge(countholders, alg_counts, by = "algorithm") countholders[, algorithm := NULL] setnames(countholders, "alg_modif", "algorithm") gc() # Add count holders to tau #taus <- merge(taus, countholders, by.x = c("algorithm1", "alpha"), by.y = c("algorithm", "alpha")) #setnames(taus, "countholders", "countholders1") #taus <- merge(taus, countholders, by.x = c("algorithm2", "alpha"), by.y = c("algorithm", "alpha")) #setnames(taus, "countholders", "countholders2") # Add entity industry to the data (peak use of 40 GB) taus[, industrysection := uk_basic_companies_data[ match(taus$company_number, uk_basic_companies_data$CompanyNumber)]$industrysection_1 ] gc() # Summary statistics of Kendall's tau-b ## Mean, overall taus_overall_mean <- taus[!is.na(tau), list(tau = mean(tau), tausemean = sd(tau)/sqrt(.N)), by = c("algorithm1", "algorithm2", "alpha")] ## Mean, by industry taus_industry_mean <- taus[!is.na(tau) & !is.na(industrysection), list( tau = mean(tau), tausemean = sd(tau)/sqrt(.N)), by = c("algorithm1", "algorithm2", "alpha", "industrysection")] # Export point #fwrite(taus_overall_mean, file = "output/alphaicon_paper/mean_kendall_taus_participant_ranks_dpi_npi_transitive_uk_organisations_participants_2021_7sep21.csv") #fwrite(taus_industry_mean, file = "output/alphaicon_paper/mean_by_industrysection_kendall_taus_participant_ranks_dpi_npi_transitive_uk_organisations_participants_2021_7sep21.csv") # Produce the plot with overall mean taus_overall_mean[ grepl("dpi|npi", algorithm1), algorithm1 := toupper(algorithm1)] ## Other algorithms vs best and worst-performing transitive algorithm taus_mean_plot <- ggplot(aes(x = alpha, y = tau, group = algorithm1, color = algorithm1), data = taus_overall_mean[ algorithm1 == "NPI" & algorithm2 == "transitive"]) + geom_line(size = 1.5, alpha = 1) + geom_point(size = 2) + geom_text(aes(label = round(tau, 4)), size = 2.5, nudge_y = 0.0005) + scale_x_continuous( breaks = c(seq(0, 0.9, 0.1), 0.999), labels = c(seq(0, 0.9, 0.1), 0.999), guide = guide_axis(n.dodge = 1)) + #facet_wrap(. ~ algorithm1, scales = "free_y")+ labs(y = expression("Kendall's "~tau~"-b betw. NPI\nand "~alpha~"-ICON SH ranks"), x = expression(alpha)) + scale_colour_brewer(type = "qual", palette = 2) + theme_minimal() + theme(legend.position = "none", text = element_text(size = 14, family = "Open Sans"), panel.grid.major = element_blank(), panel.grid.minor = element_blank()) #ggsave(taus_mean_plot, file = "output/alphaicon_paper/taus_mean_plot.pdf", width = 8.5, height = 4, device = cairo_pdf, scale = 1) # Export to a table #stargazer(taus_overall_mean[ algorithm1 == "NPI" & algorithm2 == "transitive"], type = "latex", summary = F, out = "output/alphaicon_paper/taus_overall_mean.tex", float = F, multicolumn = T, digits = 5)