Plotting a hierarchical tree
This script can be used to plot a hierarchical tree like the one
presented in Yao et al., 2023 Figure 1. The input can be a data frame
with rows representing the leaf-level and columns represent annotations.
This data frame needs to be restructured to a parent-child (hierarchical
network data frame), which is what the ‘as.hierDF’ function does.
require(dplyr)
require(ggplot2)
require(ggnewscale)
require(ggraph)
require(igraph)
require(tidyverse)
#theme_set(theme_void())
require(ggrepel)
Let’s start by loading some dummy hierarchical data. This data is a
subset of the data from the Allen
Brain Cell Atlas.
A hierarchic structure is basically a set of nodes, with edges
linking nodes. Let’s create an edge list for plotting using the
package.We’ll do this for a subset of the data.
cl.df <- cl.df[cl.df$class_id %in% c(6,7),]
hierDF <- as.hierDF(cl.df, levels = c("class_id_label", "subclass_id_label","supertype_id_label"),rootname="wmb")
## [1] 1
## [1] 2
## [1] 3
# Create a graph object
graph <- graph_from_data_frame( hierDF)
dummy <- ggraph(graph, layout = 'dendrogram', circular = FALSE) +
geom_edge_diagonal()
dend_leaves <- dummy[["data"]] %>% filter(leaf == TRUE)
n_leaves <- nrow(dend_leaves)
dend_leaves <- dend_leaves %>%
left_join(st.df[,c("supertype_id_label", "supertype_color","supertype_id")], by=c("name"="supertype_id_label"))
subclass.df <- dummy[["data"]] %>%
filter(name %in% sc.df$subclass_id_label) %>%
left_join(sc.df[,c("subclass_id_label","subclass_id", "subclass_color")], by=c("name"="subclass_id_label"))
subclass.df$subclass_id <- gsub( " .*$", "", subclass.df$name)
class.df <- dummy[["data"]] %>%
filter(name %in% c.df$class_id_label) %>%
left_join(c.df[,c("class_id_label", "class_color")], by=c("name"="class_id_label"))
Next use ggraph to plot the ‘dendrogram’ and add additional layers of
labeling using standard ggplot.
flat_plot = ggraph(graph, layout = 'dendrogram', circular = FALSE) +
geom_edge_diagonal(width = 0.25,
color="grey50") +
#supertype
geom_point(data = dend_leaves,
aes(x=x, y=y, color=supertype_color),
cex=1,
shape=19) +
scale_color_identity(guide = "none") +
# subclass
new_scale_color() +
geom_point(data=subclass.df,
aes(x=x, y=y, color= subclass_color),
cex=2,
shape=19)+
geom_text(data=subclass.df,
aes(x=x, y=y, label= subclass_id),
size=3,hjust=0, vjust=0.5,
angle=90)+ #, direction='y')+
scale_color_identity(guide = "none") +
# class
new_scale_color() +
geom_point(data=class.df,
aes(x=x, y=y, color= class_color),
cex=2,
shape=19)+
geom_text_repel(data=class.df,
aes(x=x, y=y, label= name),
size=3,hjust=1, vjust=0.5,
direction='y')+
scale_color_identity(guide = "none") +
# add other levels if needed
geom_text(data=dend_leaves,
aes(x = x,
y = y-0.1,
label = name),
angle = 90,
hjust = 1.0,
vjust = 0.5,
size = 3,
lineheight=0.1) +
scale_x_continuous(limits = c(-1,n_leaves + 1),
expand=c(0,0)) +
coord_cartesian(clip = 'off') +
theme_void() +
theme(plot.margin = margin(t = 0, r = 0, b = 120, l = 0,))
LS0tCnRpdGxlOiAiSGllclRyZWUiCm91dHB1dDoKICBodG1sX2RvY3VtZW50OgogICAgdG9jOiB0cnVlCiAgICB0b2NfZmxvYXQ6IHRydWUKICAgIGNvZGVfZG93bmxvYWQ6IHRydWUKICAgIGNvZGVfZm9sZGluZzogaGlkZQpkYXRlOiAiMjAyNC0wMi0wNiIKLS0tClwKXAoKIyMgUGxvdHRpbmcgYSBoaWVyYXJjaGljYWwgdHJlZQpcCgpUaGlzIHNjcmlwdCBjYW4gYmUgdXNlZCB0byBwbG90IGEgaGllcmFyY2hpY2FsIHRyZWUgbGlrZSB0aGUgb25lIHByZXNlbnRlZCBpbiBZYW8gZXQgYWwuLCAyMDIzIEZpZ3VyZSAxLgpUaGUgaW5wdXQgY2FuIGJlIGEgZGF0YSBmcmFtZSB3aXRoIHJvd3MgcmVwcmVzZW50aW5nIHRoZSBsZWFmLWxldmVsIGFuZCBjb2x1bW5zIHJlcHJlc2VudCBhbm5vdGF0aW9ucy4gVGhpcyBkYXRhIGZyYW1lIG5lZWRzIHRvIGJlIHJlc3RydWN0dXJlZCB0byBhIHBhcmVudC1jaGlsZCAoaGllcmFyY2hpY2FsIG5ldHdvcmsgZGF0YSBmcmFtZSksIHdoaWNoIGlzIHdoYXQgdGhlICdhcy5oaWVyREYnIGZ1bmN0aW9uIGRvZXMuIAoKYGBge3Iga2xpcHB5LCBlY2hvPUZBTFNFLCBpbmNsdWRlPVRSVUV9CmtsaXBweTo6a2xpcHB5KCkKYGBgCgpgYGB7ciBzZXR1cCwgaW5jbHVkZT1GQUxTRX0gCmtuaXRyOjpvcHRzX2NodW5rJHNldCh3YXJuaW5nID0gRkFMU0UsIG1lc3NhZ2UgPSBGQUxTRSkgCmBgYAoKYGBge3IgbG9hZGluZyBsaWJzLCBlY2hvPVR9CgpyZXF1aXJlKGRwbHlyKQpyZXF1aXJlKGdncGxvdDIpCnJlcXVpcmUoZ2duZXdzY2FsZSkKcmVxdWlyZShnZ3JhcGgpCnJlcXVpcmUoaWdyYXBoKQpyZXF1aXJlKHRpZHl2ZXJzZSkKI3RoZW1lX3NldCh0aGVtZV92b2lkKCkpCnJlcXVpcmUoZ2dyZXBlbCkKCmBgYAoKTGV0J3Mgc3RhcnQgYnkgbG9hZGluZyBzb21lIGR1bW15IGhpZXJhcmNoaWNhbCBkYXRhLiBUaGlzIGRhdGEgaXMgYSBzdWJzZXQgb2YgdGhlIGRhdGEgZnJvbQp0aGUgW0FsbGVuIEJyYWluIENlbGwgQXRsYXNdKGh0dHBzOi8vYWxsZW5pbnN0aXR1dGUuZ2l0aHViLmlvL2FiY19hdGxhc19hY2Nlc3MvaW50cm8uaHRtbCkuCgpgYGB7ciwgZWNobz1GLCBldmFsPVR9ClVSTCA9ICdodHRwczovL2FsbGVuLWJyYWluLWNlbGwtYXRsYXMuczMtdXMtd2VzdC0yLmFtYXpvbmF3cy5jb20vbWV0YWRhdGEvV01CLXRheG9ub215LzIwMjMxMjE1L2NsLmRmX0NDTjIwMjMwNzIyMC54bHN4JwpkYXRhID0gcmlvOjppbXBvcnRfbGlzdChVUkwpCgpjb2xvcnMgPC0gcmlvOjppbXBvcnQoImh0dHBzOi8vYWxsZW4tYnJhaW4tY2VsbC1hdGxhcy5zMy11cy13ZXN0LTIuYW1hem9uYXdzLmNvbS9tZXRhZGF0YS9XTUItdGF4b25vbXkvMjAyMzEyMTUvdmlld3MvY2x1c3Rlcl90b19jbHVzdGVyX2Fubm90YXRpb25fbWVtYmVyc2hpcF9jb2xvci5jc3YiKQpgYGAKCmBgYHtyLCBlY2hvPUYsIGV2YWw9VH0KCmNsLmRmIDwtIGRhdGEkY2x1c3Rlcl9hbm5vdGF0aW9uCmNsLmRmIDwtIGNsLmRmW2NsLmRmJGNsYXNzX2xhYmVsICE9ICJMUSIsXQoKIyBhZGQgY29sb3JzIHRvIGNsdXN0ZXIgZGF0YSBmcmFtZQpjb2xvcnMkY2x1c3Rlcl9hbGlhcyA8LSBhcy5jaGFyYWN0ZXIoYXMuaW50ZWdlcihjb2xvcnMkY2x1c3Rlcl9hbGlhcykpCmNsLmRmIDwtIGNsLmRmICU+JSBsZWZ0X2pvaW4oY29sb3JzLCBieT1jKCJjbCI9ImNsdXN0ZXJfYWxpYXMiKSkKCnNlbGVjdC5jb2x1bW5zIDwtIGNvbG5hbWVzKGNsLmRmKVtncmVwKCJec3VwZXJ0eXBlIiwgY29sbmFtZXMoY2wuZGYpKV0Kc3QuZGYgPC0gY2wuZGYgJT4lIGdyb3VwX2J5X2F0KHNlbGVjdC5jb2x1bW5zKSAlPiUgc3VtbWFyaXNlKG49bigpKQoKc2VsZWN0LmNvbHVtbnMgPC0gY29sbmFtZXMoY2wuZGYpW2dyZXAoIl5zdWJjbGFzcyIsIGNvbG5hbWVzKGNsLmRmKSldCnNjLmRmIDwtIGNsLmRmICU+JSBncm91cF9ieV9hdChzZWxlY3QuY29sdW1ucykgJT4lIHN1bW1hcmlzZShuPW4oKSkKCnNlbGVjdC5jb2x1bW5zIDwtIGNvbG5hbWVzKGNsLmRmKVtncmVwKCJeY2xhc3MiLCBjb2xuYW1lcyhjbC5kZikpXQpjLmRmIDwtIGNsLmRmICU+JSBncm91cF9ieV9hdChzZWxlY3QuY29sdW1ucykgJT4lIHN1bW1hcmlzZShuPW4oKSkKCmBgYAoKQSBoaWVyYXJjaGljIHN0cnVjdHVyZSBpcyBiYXNpY2FsbHkgYSBzZXQgb2Ygbm9kZXMsIHdpdGggZWRnZXMgbGlua2luZyBub2Rlcy4gTGV0J3MgY3JlYXRlIGFuIGVkZ2UgbGlzdCBmb3IgcGxvdHRpbmcgdXNpbmcgdGhlIDxpZ3JhcGg+IHBhY2thZ2UuV2UnbGwgZG8gdGhpcyBmb3IgYSBzdWJzZXQgb2YgdGhlIGRhdGEuCgpgYGB7ciwgZWNobz1GLCBldmFsPVR9CmRldnRvb2xzOjpzb3VyY2VfZ2lzdCgiaHR0cHM6Ly9naXN0LmdpdGh1Yi5jb20vY3ZhbnZlbHQvZTBlNDgzMzZmMDFjNDlhYTYxNmFhYWI0YWJmOTk3ZDgiKQpgYGAKCgoKYGBge3IgY3JlYXRlIHRyZWUsIGZpZy53aWR0aD0xMiwgZmlnLmhlaWdodD00LCBlY2hvPVR9CmNsLmRmIDwtIGNsLmRmW2NsLmRmJGNsYXNzX2lkICVpbiUgYyg2LDcpLF0KCmhpZXJERiA8LSBhcy5oaWVyREYoY2wuZGYsIGxldmVscyA9IGMoImNsYXNzX2lkX2xhYmVsIiwgInN1YmNsYXNzX2lkX2xhYmVsIiwic3VwZXJ0eXBlX2lkX2xhYmVsIikscm9vdG5hbWU9IndtYiIpCiMgQ3JlYXRlIGEgZ3JhcGggb2JqZWN0CmdyYXBoIDwtIGdyYXBoX2Zyb21fZGF0YV9mcmFtZSggaGllckRGKQoKCmR1bW15IDwtIGdncmFwaChncmFwaCwgbGF5b3V0ID0gJ2RlbmRyb2dyYW0nLCBjaXJjdWxhciA9IEZBTFNFKSArIAogIGdlb21fZWRnZV9kaWFnb25hbCgpCgpkZW5kX2xlYXZlcyA8LSBkdW1teVtbImRhdGEiXV0gJT4lIGZpbHRlcihsZWFmID09IFRSVUUpCgpuX2xlYXZlcyA8LSBucm93KGRlbmRfbGVhdmVzKQoKZGVuZF9sZWF2ZXMgPC0gZGVuZF9sZWF2ZXMgJT4lIAogIGxlZnRfam9pbihzdC5kZlssYygic3VwZXJ0eXBlX2lkX2xhYmVsIiwgInN1cGVydHlwZV9jb2xvciIsInN1cGVydHlwZV9pZCIpXSwgYnk9YygibmFtZSI9InN1cGVydHlwZV9pZF9sYWJlbCIpKQoKc3ViY2xhc3MuZGYgPC0gZHVtbXlbWyJkYXRhIl1dICU+JSAKICBmaWx0ZXIobmFtZSAlaW4lIHNjLmRmJHN1YmNsYXNzX2lkX2xhYmVsKSAlPiUgCiAgbGVmdF9qb2luKHNjLmRmWyxjKCJzdWJjbGFzc19pZF9sYWJlbCIsInN1YmNsYXNzX2lkIiwgInN1YmNsYXNzX2NvbG9yIildLCBieT1jKCJuYW1lIj0ic3ViY2xhc3NfaWRfbGFiZWwiKSkKc3ViY2xhc3MuZGYkc3ViY2xhc3NfaWQgPC0gZ3N1YiggIiAuKiQiLCAiIiwgc3ViY2xhc3MuZGYkbmFtZSkgCgpjbGFzcy5kZiA8LSAgZHVtbXlbWyJkYXRhIl1dICU+JSAKICBmaWx0ZXIobmFtZSAlaW4lIGMuZGYkY2xhc3NfaWRfbGFiZWwpICU+JSAKICBsZWZ0X2pvaW4oYy5kZlssYygiY2xhc3NfaWRfbGFiZWwiLCAiY2xhc3NfY29sb3IiKV0sIGJ5PWMoIm5hbWUiPSJjbGFzc19pZF9sYWJlbCIpKQoKYGBgCk5leHQgdXNlIGdncmFwaCB0byBwbG90IHRoZSAnZGVuZHJvZ3JhbScgYW5kIGFkZCBhZGRpdGlvbmFsIGxheWVycyBvZiBsYWJlbGluZyB1c2luZyBzdGFuZGFyZCBnZ3Bsb3QuCgpgYGB7ciBjcmVhdGUgcGxvdCwgZmlnLndpZHRoPTExLCBmaWcuaGVpZ2h0PTIsIGV2YWw9VCwgZWNobz1UfQpmbGF0X3Bsb3QgPSBnZ3JhcGgoZ3JhcGgsIGxheW91dCA9ICdkZW5kcm9ncmFtJywgY2lyY3VsYXIgPSBGQUxTRSkgKyAKICBnZW9tX2VkZ2VfZGlhZ29uYWwod2lkdGggPSAwLjI1LAogICAgICAgICAgICAgICAgICAgICBjb2xvcj0iZ3JleTUwIikgKwogICNzdXBlcnR5cGUKICBnZW9tX3BvaW50KGRhdGEgPSBkZW5kX2xlYXZlcywgCiAgICAgICAgICAgICBhZXMoeD14LCB5PXksIGNvbG9yPXN1cGVydHlwZV9jb2xvciksCiAgICAgICAgICAgICBjZXg9MSwKICAgICAgICAgICAgIHNoYXBlPTE5KSArCiAgc2NhbGVfY29sb3JfaWRlbnRpdHkoZ3VpZGUgPSAibm9uZSIpICsKICAjIHN1YmNsYXNzCiAgbmV3X3NjYWxlX2NvbG9yKCkgKwogIGdlb21fcG9pbnQoZGF0YT1zdWJjbGFzcy5kZiwgCiAgICAgICAgICAgICBhZXMoeD14LCB5PXksIGNvbG9yPSBzdWJjbGFzc19jb2xvciksCiAgICAgICAgICAgICBjZXg9MiwKICAgICAgICAgICAgIHNoYXBlPTE5KSsKICBnZW9tX3RleHQoZGF0YT1zdWJjbGFzcy5kZiwgCiAgICAgICAgICAgICBhZXMoeD14LCB5PXksIGxhYmVsPSBzdWJjbGFzc19pZCksCiAgICAgICAgICAgICBzaXplPTMsaGp1c3Q9MCwgdmp1c3Q9MC41LAogICAgICAgICAgICAgYW5nbGU9OTApKyAjLCAgICAgICAgICAgICBkaXJlY3Rpb249J3knKSsKICAgIHNjYWxlX2NvbG9yX2lkZW50aXR5KGd1aWRlID0gIm5vbmUiKSArCiAgIyBjbGFzcwogIG5ld19zY2FsZV9jb2xvcigpICsKICBnZW9tX3BvaW50KGRhdGE9Y2xhc3MuZGYsIAogICAgICAgICAgICAgYWVzKHg9eCwgeT15LCBjb2xvcj0gY2xhc3NfY29sb3IpLAogICAgICAgICAgICAgY2V4PTIsCiAgICAgICAgICAgICBzaGFwZT0xOSkrCiAgZ2VvbV90ZXh0X3JlcGVsKGRhdGE9Y2xhc3MuZGYsIAogICAgICAgICAgICAgYWVzKHg9eCwgeT15LCBsYWJlbD0gbmFtZSksCiAgICAgICAgICAgICBzaXplPTMsaGp1c3Q9MSwgdmp1c3Q9MC41LAogICAgICAgICAgICAgZGlyZWN0aW9uPSd5JykrCiAgc2NhbGVfY29sb3JfaWRlbnRpdHkoZ3VpZGUgPSAibm9uZSIpICsKICAjIGFkZCBvdGhlciBsZXZlbHMgaWYgbmVlZGVkCiAgZ2VvbV90ZXh0KGRhdGE9ZGVuZF9sZWF2ZXMsCiAgICAgICAgICAgIGFlcyh4ID0geCwKICAgICAgICAgICAgICAgIHkgPSB5LTAuMSwKICAgICAgICAgICAgICAgIGxhYmVsID0gbmFtZSksCiAgICAgICAgICAgIGFuZ2xlID0gOTAsCiAgICAgICAgICAgIGhqdXN0ID0gMS4wLAogICAgICAgICAgICB2anVzdCA9IDAuNSwKICAgICAgICAgICAgc2l6ZSA9IDMsCiAgICAgICAgICAgIGxpbmVoZWlnaHQ9MC4xKSArCiAgc2NhbGVfeF9jb250aW51b3VzKGxpbWl0cyA9IGMoLTEsbl9sZWF2ZXMgKyAxKSwKICAgICAgICAgICAgICAgICAgICAgZXhwYW5kPWMoMCwwKSkgKyAgCiAgY29vcmRfY2FydGVzaWFuKGNsaXAgPSAnb2ZmJykgICsKICB0aGVtZV92b2lkKCkgICsKICB0aGVtZShwbG90Lm1hcmdpbiA9IG1hcmdpbih0ID0gMCwgciA9IDAsIGIgPSAxMjAsIGwgPSAwLCkpCiAgCgoKYGBgCgpgYGB7ciBwbG90IHRyZWUsIGZpZy53aWR0aD0xMiwgZmlnLmhlaWdodD00LGVjaG89Rn0KZmxhdF9wbG90CmBgYAoKCg==