258 lines
15 KiB
R
258 lines
15 KiB
R
# This code plots the selected sub-networks
|
||
# in the PSC data and produces the figures
|
||
# for the α-ICON paper
|
||
library(data.table)
|
||
library(igraph)
|
||
library(stringi)
|
||
library(stringr)
|
||
library(ggplot2)
|
||
library(ggnetwork)
|
||
library(ggthemes)
|
||
library(showtext)
|
||
|
||
# Add the font to use
|
||
font_add_google("Open Sans", "Open Sans")
|
||
showtext_auto()
|
||
|
||
# Declare working directory beforehand in an environment variable
|
||
# ALPHAICON_PATH = "path_to_your_folder"
|
||
# with the aid of usethis::edit_r_environ()
|
||
# Restart R session for the changes to make effect
|
||
setwd(Sys.getenv('ALPHAICON_PATH'))
|
||
|
||
# Load the active participants snapshot of PSC (prepared by data_preparation/uk/2_psc_snapshot_to_participants_panel.r)
|
||
psc <- fread("output/uk/uk_organisations_participants_2021_long_2aug21.csv", integer64 = "character", na.strings = "", encoding = "UTF-8")
|
||
|
||
# Load the companies data (prepared by data_preparation/uk/1b_process_companies_data.r)
|
||
load("data/uk/uk_basic_companies_data_2021-08-01.rdata")
|
||
|
||
# Load the transitive ownership of each participant at alpha = 0.999
|
||
# (prepared by alphaicon_paper/1_compute_alphaicon.ipynb)
|
||
holders <- fread("output/uk/transitive/uk_organisations_transitive_ownership_alpha0.999_2021_long_2aug21.csv", integer64 = "character", na.strings = "", encoding = "UTF-8", colClasses = c("character", "character", "numeric"))
|
||
|
||
# Load the data on graph membership
|
||
# (prepared by alphaicon_paper/1_compute_alphaicon.ipynb)
|
||
graph_membership <- fread("output/uk/uk_organisations_participation_graph_core_periphery_membership_6aug21.csv", integer64 = "character", na.strings = "", encoding = "UTF-8", header = T, colClasses = c("numeric", "character", "factor"))
|
||
graph_membership[, V1 := NULL ]
|
||
setnames(graph_membership, "company_number/id", "participant_id")
|
||
gc()
|
||
|
||
# We need to pick up an illustrative organization
|
||
|
||
# Ideally this organization should have a small network size
|
||
# and have at least one entity in graph core
|
||
|
||
# Orgs in the core with all other types in the subgraph
|
||
# and manageable network size
|
||
temp <- holders[ participant_id %in% graph_membership[ type == "C"]$participant_id ]
|
||
temp[, company_type := graph_membership[ match(temp$company_number, graph_membership$participant_id) ]$type ]
|
||
temp[, participant_type := graph_membership[ match(temp$participant_id, graph_membership$participant_id) ]$type ]
|
||
|
||
# Devise a set of candidate orgs for illustration
|
||
# where all organization types are present in the company network
|
||
candidate_orgs <- temp[, list( company_types = uniqueN(company_type), network_size = uniqueN(company_number)), by = "participant_id"]
|
||
|
||
# Consider moderately sized networks with all types of orgs there
|
||
candidate_ids <- unique(candidate_orgs[ company_types > 2 & network_size <= 30 & network_size > 10]$participant_id)
|
||
|
||
# Additionally, consider the largest structure in terms
|
||
# of graph path from participant to company
|
||
# This is the result of igraph::farthest_vertices
|
||
# applied to the psc graph
|
||
farthest_components <- c("11443992")
|
||
|
||
# Also consider participant SEAN DINNEN
|
||
# he has DPI and NPI of 0 but very large transitivity score
|
||
sean_dinnen <- "SEAN$MICHAEL$DINNEN$1968$3"
|
||
|
||
# Example of long path
|
||
heidelbergcement <- "HEIDELBERGCEMENT AG$GERMANY$HRB 33082"
|
||
|
||
# Finally, consider the organizations with large difference between
|
||
# the super-holder scores (computed in alphaicon_paper/6_rank_top_holders.r)
|
||
top100_holders_diff_npi_dpi <- fread("output/alphaicon_paper/uk_organisations_top100_holders_diff_npi_dpi_2021_long_2aug21.csv", encoding = "UTF-8")
|
||
top100_holders_diff_transitive_dpi <- fread("output/alphaicon_paper/uk_organisations_top100_holders_diff_transitive_dpi_2021_long_2aug21.csv", encoding = "UTF-8")
|
||
top100_holders_diff_transitive_npi <- fread("output/alphaicon_paper/uk_organisations_top100_holders_diff_transitive_npi_2021_long_2aug21.csv", encoding = "UTF-8")
|
||
|
||
# Define the candidates to consider (from affiliated_entities
|
||
# and other inspection)
|
||
organizations_of_interest <- unique(c("IHS MARKIT LTD$BERMUDA$48610", "06647317", "11594795", candidate_ids, farthest_components, sean_dinnen))
|
||
|
||
# Or specify one organization for the graph in the paper:
|
||
#organizations_of_interest <- "03782947" # OPUS
|
||
|
||
organizations_of_interest <- c(top100_holders_diff_transitive_dpi[1]$participant_id, # SPECSAVERS OPTICAL SUPERSTORES LTD
|
||
top100_holders_diff_npi_dpi[82]$participant_id, # LENDLEASE INTERNATIONAL PTY LIMITED
|
||
top100_holders_diff_transitive_dpi[99]$participant_id, # THE BERKELEY GROUP PLC
|
||
top100_holders_diff_transitive_npi[17]$participant_id) # BAJLINDER$KAUR$BOPARAN$1968$1. NB: her husband RANJIT$SINGH$BOPARAN$1966$8 has alpha-ICON sum 73
|
||
|
||
# Iterate over multiple graph candidates
|
||
for( org_interest in organizations_of_interest ) {
|
||
|
||
# Ultimate companies held by this organization
|
||
holders_subset_ultimate <- holders[ participant_id %in% org_interest ]
|
||
|
||
# Consider their total interest in all entities
|
||
holders_subset <- psc[ participant_id %in% unique(holders_subset_ultimate$company_number) | company_number %in% unique(holders_subset_ultimate$company_number) ]
|
||
holders_subset <- holders_subset[ !is.na(equity_share)]
|
||
setnames(holders_subset, "equity_share", "share")
|
||
|
||
# Network size
|
||
#uniqueN(holders_subset$company_number)
|
||
#uniqueN(holders_subset$participant_id)
|
||
|
||
# Add company and participant names and countries of origin
|
||
holders_subset[, company_name := uk_basic_companies_data[ match(holders_subset$company_number, uk_basic_companies_data$CompanyNumber)]$CompanyName ]
|
||
#holders_subset[, company_country := uk_basic_companies_data[ match(holders_subset$company_number, uk_basic_companies_data$CompanyNumber)]$CountryOfOrigin ]
|
||
|
||
holders_subset[, participant_name := uk_basic_companies_data[ match(holders_subset$participant_id, uk_basic_companies_data$CompanyNumber)]$CompanyName ]
|
||
#holders_subset[, participant_country := uk_basic_companies_data[ match(holders_subset$participant_id, uk_basic_companies_data$CompanyNumber)]$CountryOfOrigin ]
|
||
|
||
# Proper name handling for individuals: first name-surname
|
||
holders_subset[grepl("$", participant_id, fixed = T) & kind != "individual", participant_name := as.data.table(stri_split_fixed(participant_id, "$", n = 3, simplify = T)[,1])]
|
||
holders_subset[kind == "individual", participant_name := paste0(stri_split_fixed(participant_id, "$", n = 4, simplify = T)[, 1], " ", stri_split_fixed(participant_id, "$", n = 4, simplify = T)[, 3])]
|
||
|
||
#holders_subset[, company_country := gsub("United Kingdom", "UK", company_country) ]
|
||
#holders_subset[, participant_country := gsub("United Kingdom", "UK", participant_country) ]
|
||
|
||
# Are the names unique?
|
||
#uniqueN(holders_subset$company_name)
|
||
#uniqueN(holders_subset$participant_name)
|
||
|
||
# Add the graph types of organizations
|
||
holders_subset[, company_type := graph_membership[ match(holders_subset$company_number, graph_membership$participant_id) ]$type ]
|
||
holders_subset[, participant_type := graph_membership[ match(holders_subset$participant_id, graph_membership$participant_id) ]$type ]
|
||
holders_subset[, share_range := NA_character_ ]
|
||
holders_subset[ share == 87.5, share_range := "75-100%"]
|
||
holders_subset[ share == 62.5, share_range := "50-75%"]
|
||
holders_subset[ share == 37.5, share_range := "25-50%"]
|
||
|
||
## Total number of holdings
|
||
holdings_count <- holders_subset[, list(count_holdings_total = .N), by = "participant_name"]
|
||
|
||
## Total number of holders
|
||
holders_count <- holders_subset[, list(count_holders_total = .N), by = "company_name"]
|
||
|
||
# To igraph object
|
||
## Create an object with vertex metadata
|
||
vertex_metadata <- unique(rbind(unique(data.table( name = holders_subset$company_name, type = holders_subset$company_type)),
|
||
data.table( name = holders_subset$participant_name, type = holders_subset$participant_type)
|
||
), by = "name")
|
||
vertex_metadata <- merge(vertex_metadata, holdings_count, by.x = "name", by.y = "participant_name", all.x = T, all.y = F, sort = F)
|
||
vertex_metadata <- merge(vertex_metadata, holders_count, by.x = "name", by.y = "company_name", all.x = T, all.y = F, sort = F)
|
||
vertex_metadata[is.na(count_holdings_total), count_holdings_total := 0]
|
||
vertex_metadata[is.na(count_holders_total), count_holders_total := 0]
|
||
# NA to mock 0
|
||
vertex_metadata[count_holdings_total == 0, count_holdings_total := 1]
|
||
# Remove intermediaries with no holders
|
||
vertex_metadata <- vertex_metadata[ !(count_holders_total == 0 & type == "I") ]
|
||
|
||
## Convert to igraph
|
||
holders_example_graph <- graph_from_data_frame(holders_subset[participant_name %in% vertex_metadata$name & company_name %in% vertex_metadata$name, c("participant_name", "company_name", "share", "share_range")], directed = T, vertices = vertex_metadata)
|
||
|
||
# Line-wrap the name
|
||
V(holders_example_graph)$name_label <- str_wrap(V(holders_example_graph)$name, 15)
|
||
|
||
# Graph to ggnetwork
|
||
holders_example_net <- ggnetwork(holders_example_graph, layout = with_kk(), arrow.gap = 0.01)
|
||
|
||
# Empty name for certain types if network is large
|
||
if(nrow(holders_example_net) > 150) {
|
||
|
||
holders_example_net[!(holders_example_net$type %in% c("SH", "C") | holders_example_net$name %in% c("SPECSAVERS UK HOLDINGS LIMITED", "HANSON PACKED PRODUCTS LIMITED", "HOUSERATE LIMITED", "HANSON LIMITED", "HEIDELBERGCEMENT UK HOLDING LIMITED") ), "name_label"] <- ""
|
||
|
||
}
|
||
|
||
# Plot the graph
|
||
holders_example_plot <- ggplot(holders_example_net, aes(x = x, y = y, xend = xend, yend = yend)) +
|
||
geom_edges(color = "#ebecf0", arrow = arrow(angle = 45, length = unit(5, "pt"), type = "closed"), show.legend = F) +
|
||
geom_nodes(aes(color = type, size = log(count_holdings_total)), show.legend = T) +
|
||
{ if( org_interest %in% c("03782947", "PHILIP$AULD$MACTAGGART$1956$2", "BAJLINDER$KAUR$BOPARAN$1968$1")) geom_edgetext(aes(label = share_range), color = "grey25", size = 1.5) } +
|
||
geom_nodetext_repel(aes(label = name_label, color = type), fontface = "bold", size = 2, show.legend = F, point.padding = 5, max.overlaps = 300) +
|
||
#{ if(nrow(holders_example_net) > 150) geom_nodetext_repel(aes(color = type), label = "", fontface = "bold", size = 2, show.legend = F, point.padding = 5) } +
|
||
#{ if(nrow(holders_example_net) <= 150) geom_nodetext_repel(aes(label = name, color = type), fontface = "bold", size = 2, show.legend = F, point.padding = 5) } +
|
||
scale_size(guide = "none") +
|
||
scale_color_manual(name = "node type", labels = c("super-holder", "super-target", "intermediary", "core"), values = c("SH" = "#66c2a5", "ST" = "#8da0cb", "I" = "grey", "C" = "#fc8d62")) +
|
||
guides(color = guide_legend(override.aes = list(size = 5))) +
|
||
theme_blank() + theme(text = element_text(family = "Open Sans"), legend.position = "bottom", plot.margin = unit(c(0, 0, 0, 0), "cm"))
|
||
|
||
# Export to an image
|
||
graph_file_name <- gsub("[[:punct:]]", "", stri_split_fixed(org_interest, "$", simplify = T)[1,1])
|
||
|
||
if( org_interest %in% c("SEAN$MICHAEL$DINNEN$1968$3", "SPECSAVERS OPTICAL SUPERSTORES LTD$UNITED KINGDOM$1721624") ) {
|
||
|
||
ggsave(paste0("output/alphaicon_paper/network_examples/uk_example ", graph_file_name, ".pdf"), height = 20, width = 20, scale = 1.2, device = cairo_pdf)
|
||
|
||
} else {
|
||
|
||
ggsave(paste0("output/alphaicon_paper/network_examples/uk_example ", graph_file_name, ".pdf"), height = 10, width = 10, device = cairo_pdf)
|
||
|
||
}
|
||
|
||
message(graph_file_name)
|
||
|
||
}
|
||
|
||
########
|
||
# Plot the graph with the core components only
|
||
core <- psc[ participant_id %in% unique(graph_membership[type == "C"]$participant_id) & company_number %in% unique(graph_membership[type == "C"]$participant_id) ]
|
||
core <- core[ !is.na(equity_share)]
|
||
setnames(core, "equity_share", "share")
|
||
|
||
# Add company and participant names and countries of origin
|
||
core[, company_name := uk_basic_companies_data[ match(core$company_number, uk_basic_companies_data$CompanyNumber)]$CompanyName ]
|
||
core[, participant_name := uk_basic_companies_data[ match(core$participant_id, uk_basic_companies_data$CompanyNumber)]$CompanyName ]
|
||
|
||
# Proper name handling for individuals: first name-surname
|
||
core[grepl("$", participant_id, fixed = T) & kind != "individual", participant_name := as.data.table(stri_split_fixed(participant_id, "$", n = 3, simplify = T)[,1])]
|
||
core[kind == "individual", participant_name := paste0(stri_split_fixed(participant_id, "$", n = 4, simplify = T)[, 1], " ", stri_split_fixed(participant_id, "$", n = 4, simplify = T)[, 3])]
|
||
|
||
# Add the graph types of organizations
|
||
core[, company_type := graph_membership[ match(core$company_number, graph_membership$participant_id) ]$type ]
|
||
core[, participant_type := graph_membership[ match(core$participant_id, graph_membership$participant_id) ]$type ]
|
||
core[, share_range := NA_character_ ]
|
||
core[ share == 87.5, share_range := "75-100%"]
|
||
core[ share == 62.5, share_range := "50-75%"]
|
||
core[ share == 37.5, share_range := "25-50%"]
|
||
|
||
# Add the total number of companies where core participants
|
||
## Total number of holdings
|
||
core_holdings <- holders[participant_id %in% graph_membership[type == "C"]$participant_id, list(count_holdings_total = .N), by = "participant_id"]
|
||
## Holdings in core
|
||
core_holdings <- merge(core_holdings, holders[participant_id %in% graph_membership[type == "C"]$participant_id & company_number %in% graph_membership[type == "C"]$participant_id, list(count_holdings_core = .N), by = "participant_id"], by = "participant_id", all = T)
|
||
|
||
# To igraph object
|
||
## Create an object with vertex metadata
|
||
vertex_metadata <- unique(rbind(unique(data.table(name = core$company_name, type = core$company_type, id = core$company_number)),
|
||
data.table( name = core$participant_name, type = core$participant_type, id = core$participant_id)
|
||
), by = "name")
|
||
|
||
# Add the counts of holdings
|
||
vertex_metadata <- merge(vertex_metadata, core_holdings, by.x = "id", by.y = "participant_id", all = T)
|
||
vertex_metadata[, id := NULL]
|
||
|
||
# Keep only non-trivial components
|
||
vertex_metadata <- vertex_metadata[ count_holdings_core > 2 ]
|
||
|
||
## Convert to igraph
|
||
core_graph <- graph_from_data_frame(core[ participant_name %in% vertex_metadata$name | company_name %in% vertex_metadata$name, c("participant_name", "company_name", "share", "share_range")], directed = T, vertices = vertex_metadata)
|
||
|
||
# Line-wrap the name
|
||
V(core_graph)$name_label <- str_wrap(V(core_graph)$name, 15)
|
||
|
||
# Graph to ggnetwork
|
||
core_net <- ggnetwork(core_graph, layout = with_fr(), arrow.gap = 0.01)
|
||
|
||
# Plot the graph
|
||
core_plot <- ggplot(core_net, aes(x = x, y = y, xend = xend, yend = yend)) +
|
||
geom_edges(color = "grey", arrow = arrow(angle = 45, length = unit(3, "pt"), type = "closed"), show.legend = F) +
|
||
geom_nodes(aes(color = type, size = count_holdings_total), show.legend = T) +
|
||
geom_edgetext(aes(label = share_range), color = "grey25", size = 1) +
|
||
geom_nodetext_repel(aes(label = name_label, color = type), fontface = "bold", size = 2, show.legend = F, point.padding = 5) +
|
||
scale_size(guide = "none") +
|
||
scale_color_manual(name = "node type", labels = c("super-holder", "super-target", "intermediary", "core"), values = c("SH" = "#66c2a5", "ST" = "#8da0cb", "I" = "grey", "C" = "#fc8d62")) +
|
||
guides(color = guide_legend(override.aes = list(size = 5))) +
|
||
theme_blank() + theme(text = element_text(family = "Open Sans"), legend.position = "none", plot.margin = unit(c(0, 0, 0, 0), "cm"))
|
||
|
||
ggsave(paste0("output/network_examples/core_plot.pdf"), height = 10, width = 10, scale = 1.1, device = cairo_pdf)
|