Plotting a sunburst diagram


This script can be used to plot a sunburst diagram to represent a hierarchy. The input can be a data frame with rows representing the leaf-level and columns represent annotations. This data frame needs to be restructured to a parent-child (hierarchical network data frame), which is what the ‘as.hierDF’ function does.

require(dplyr)
require(plotly)

Let’s start by loading some dummy hierarchical data. This data is a subset of the data from the Allen Brain Cell Atlas.

URL = 'https://allen-brain-cell-atlas.s3-us-west-2.amazonaws.com/metadata/WMB-taxonomy/20231215/cl.df_CCN202307220.xlsx'
data = rio::import_list(URL)

colors <- rio::import("https://allen-brain-cell-atlas.s3-us-west-2.amazonaws.com/metadata/WMB-taxonomy/20231215/views/cluster_to_cluster_annotation_membership_color.csv")
cl.df <- data$cluster_annotation
cl.df <- cl.df[cl.df$class_label != "LQ",]

cl.df <- cl.df %>% mutate(cluster_size = c(multiome.size + v2.size + v3.size))

# add colors to cluster data frame
colors$cluster_alias <- as.character(as.integer(colors$cluster_alias))
cl.df <- cl.df %>% left_join(colors, by=c("cl"="cluster_alias"))

select.columns <- colnames(cl.df)[grep("^supertype", colnames(cl.df))]
st.df <- cl.df %>% group_by_at(select.columns) %>% summarise(n=n())

select.columns <- colnames(cl.df)[grep("^subclass", colnames(cl.df))]
sc.df <- cl.df %>% group_by_at(select.columns) %>% summarise(n=n())

select.columns <- colnames(cl.df)[grep("^class", colnames(cl.df))]
c.df <- cl.df %>% group_by_at(select.columns) %>% summarise(n=n())
devtools::source_gist("https://gist.github.com/cvanvelt/ef29e1581b30b9758ec1bba1b8322619")
cl.df <- cl.df[cl.df$class_id %in% c(1:7),]
sunburstDF <- as.sunburstDF(cl.df, 
                           levels=c("class","subclass","supertype"),
                            valueCol = "cluster_size", 
                            rootname="WMB")
## [1] 1
## [1] 2
## [1] 3
p <- plot_ly() %>%
      add_trace(ids = sunburstDF$ids,
                labels = sunburstDF$labels,
                parents =sunburstDF$parent,
                values = sunburstDF$values,
                type = 'sunburst',
                sort=FALSE,
                marker = list(colors = sunburstDF$color),
                domain = list(column = 1),
                branchvalues = 'total'
                )%>%
      layout(grid = list(columns =1, rows = 1),
              margin = list(l = 0, r = 0, b = 0, t = 0)
      )
p
LS0tCnRpdGxlOiAiU3VuYnVyc3QgaGllcmFyY2h5IgphdXRob3I6ICJDaW5keSB2YW4gVmVsdGhvdmVuIgpvdXRwdXQ6CiAgaHRtbF9kb2N1bWVudDoKICAgIHRvYzogdHJ1ZQogICAgdG9jX2Zsb2F0OiB0cnVlCiAgICBjb2RlX2Rvd25sb2FkOiB0cnVlCiAgICBjb2RlX2ZvbGRpbmc6IGhpZGUKZGF0ZTogIjIwMjQtMDItMDYiCi0tLQoKXApcCgojIyBQbG90dGluZyBhIHN1bmJ1cnN0IGRpYWdyYW0KClwKClRoaXMgc2NyaXB0IGNhbiBiZSB1c2VkIHRvIHBsb3QgYSBzdW5idXJzdCBkaWFncmFtIHRvIHJlcHJlc2VudCBhIGhpZXJhcmNoeS4KVGhlIGlucHV0IGNhbiBiZSBhIGRhdGEgZnJhbWUgd2l0aCByb3dzIHJlcHJlc2VudGluZyB0aGUgbGVhZi1sZXZlbCBhbmQgY29sdW1ucyByZXByZXNlbnQgYW5ub3RhdGlvbnMuIFRoaXMgZGF0YSBmcmFtZSBuZWVkcyB0byBiZSByZXN0cnVjdHVyZWQgdG8gYSBwYXJlbnQtY2hpbGQgKGhpZXJhcmNoaWNhbCBuZXR3b3JrIGRhdGEgZnJhbWUpLCB3aGljaCBpcyB3aGF0IHRoZSAnYXMuaGllckRGJyBmdW5jdGlvbiBkb2VzLiAKCmBgYHtyIGtsaXBweSwgZWNobz1GQUxTRSwgaW5jbHVkZT1UUlVFfQprbGlwcHk6OmtsaXBweSgpCmBgYAoKYGBge3Igc2V0dXAsIGluY2x1ZGU9RkFMU0V9IAprbml0cjo6b3B0c19jaHVuayRzZXQod2FybmluZyA9IEZBTFNFLCBtZXNzYWdlID0gRkFMU0UpIApgYGAKCmBgYHtyIGxvYWRpbmcgbGlicywgZWNobz1UfQoKcmVxdWlyZShkcGx5cikKcmVxdWlyZShwbG90bHkpCgpgYGAKCkxldCdzIHN0YXJ0IGJ5IGxvYWRpbmcgc29tZSBkdW1teSBoaWVyYXJjaGljYWwgZGF0YS4gVGhpcyBkYXRhIGlzIGEgc3Vic2V0IG9mIHRoZSBkYXRhIGZyb20KdGhlIFtBbGxlbiBCcmFpbiBDZWxsIEF0bGFzXShodHRwczovL2FsbGVuaW5zdGl0dXRlLmdpdGh1Yi5pby9hYmNfYXRsYXNfYWNjZXNzL2ludHJvLmh0bWwpLgoKYGBge3IsIGVjaG89VCwgZXZhbD1UfQpVUkwgPSAnaHR0cHM6Ly9hbGxlbi1icmFpbi1jZWxsLWF0bGFzLnMzLXVzLXdlc3QtMi5hbWF6b25hd3MuY29tL21ldGFkYXRhL1dNQi10YXhvbm9teS8yMDIzMTIxNS9jbC5kZl9DQ04yMDIzMDcyMjAueGxzeCcKZGF0YSA9IHJpbzo6aW1wb3J0X2xpc3QoVVJMKQoKY29sb3JzIDwtIHJpbzo6aW1wb3J0KCJodHRwczovL2FsbGVuLWJyYWluLWNlbGwtYXRsYXMuczMtdXMtd2VzdC0yLmFtYXpvbmF3cy5jb20vbWV0YWRhdGEvV01CLXRheG9ub215LzIwMjMxMjE1L3ZpZXdzL2NsdXN0ZXJfdG9fY2x1c3Rlcl9hbm5vdGF0aW9uX21lbWJlcnNoaXBfY29sb3IuY3N2IikKYGBgCgpgYGB7ciwgZWNobz1ULCBldmFsPVR9CgpjbC5kZiA8LSBkYXRhJGNsdXN0ZXJfYW5ub3RhdGlvbgpjbC5kZiA8LSBjbC5kZltjbC5kZiRjbGFzc19sYWJlbCAhPSAiTFEiLF0KCmNsLmRmIDwtIGNsLmRmICU+JSBtdXRhdGUoY2x1c3Rlcl9zaXplID0gYyhtdWx0aW9tZS5zaXplICsgdjIuc2l6ZSArIHYzLnNpemUpKQoKIyBhZGQgY29sb3JzIHRvIGNsdXN0ZXIgZGF0YSBmcmFtZQpjb2xvcnMkY2x1c3Rlcl9hbGlhcyA8LSBhcy5jaGFyYWN0ZXIoYXMuaW50ZWdlcihjb2xvcnMkY2x1c3Rlcl9hbGlhcykpCmNsLmRmIDwtIGNsLmRmICU+JSBsZWZ0X2pvaW4oY29sb3JzLCBieT1jKCJjbCI9ImNsdXN0ZXJfYWxpYXMiKSkKCnNlbGVjdC5jb2x1bW5zIDwtIGNvbG5hbWVzKGNsLmRmKVtncmVwKCJec3VwZXJ0eXBlIiwgY29sbmFtZXMoY2wuZGYpKV0Kc3QuZGYgPC0gY2wuZGYgJT4lIGdyb3VwX2J5X2F0KHNlbGVjdC5jb2x1bW5zKSAlPiUgc3VtbWFyaXNlKG49bigpKQoKc2VsZWN0LmNvbHVtbnMgPC0gY29sbmFtZXMoY2wuZGYpW2dyZXAoIl5zdWJjbGFzcyIsIGNvbG5hbWVzKGNsLmRmKSldCnNjLmRmIDwtIGNsLmRmICU+JSBncm91cF9ieV9hdChzZWxlY3QuY29sdW1ucykgJT4lIHN1bW1hcmlzZShuPW4oKSkKCnNlbGVjdC5jb2x1bW5zIDwtIGNvbG5hbWVzKGNsLmRmKVtncmVwKCJeY2xhc3MiLCBjb2xuYW1lcyhjbC5kZikpXQpjLmRmIDwtIGNsLmRmICU+JSBncm91cF9ieV9hdChzZWxlY3QuY29sdW1ucykgJT4lIHN1bW1hcmlzZShuPW4oKSkKCmBgYAoKCmBgYHtyLCBlY2hvPVQsIGV2YWw9VH0KZGV2dG9vbHM6OnNvdXJjZV9naXN0KCJodHRwczovL2dpc3QuZ2l0aHViLmNvbS9jdmFudmVsdC9lZjI5ZTE1ODFiMzBiOTc1OGVjMWJiYTFiODMyMjYxOSIpCmBgYAoKCgpgYGB7ciBjcmVhdGUgc3VuYnVyc3QuZGYsIGZpZy53aWR0aD0xMiwgZmlnLmhlaWdodD00LCBlY2hvPVR9CmNsLmRmIDwtIGNsLmRmW2NsLmRmJGNsYXNzX2lkICVpbiUgYygxOjcpLF0Kc3VuYnVyc3RERiA8LSBhcy5zdW5idXJzdERGKGNsLmRmLCAKICAgICAgICAgICAgICAgICAgICAgICAgICAgbGV2ZWxzPWMoImNsYXNzIiwic3ViY2xhc3MiLCJzdXBlcnR5cGUiKSwKICAgICAgICAgICAgICAgICAgICAgICAgICAgIHZhbHVlQ29sID0gImNsdXN0ZXJfc2l6ZSIsIAogICAgICAgICAgICAgICAgICAgICAgICAgICAgcm9vdG5hbWU9IldNQiIpCmBgYAoKYGBge3IgY3JlYXRlIHBsb3QsIGZpZy53aWR0aD0xMSwgZmlnLmhlaWdodD0yLCBldmFsPVQsIGVjaG89VH0KcCA8LSBwbG90X2x5KCkgJT4lCiAgICAgIGFkZF90cmFjZShpZHMgPSBzdW5idXJzdERGJGlkcywKICAgICAgICAgICAgICAgIGxhYmVscyA9IHN1bmJ1cnN0REYkbGFiZWxzLAogICAgICAgICAgICAgICAgcGFyZW50cyA9c3VuYnVyc3RERiRwYXJlbnQsCiAgICAgICAgICAgICAgICB2YWx1ZXMgPSBzdW5idXJzdERGJHZhbHVlcywKICAgICAgICAgICAgICAgIHR5cGUgPSAnc3VuYnVyc3QnLAogICAgICAgICAgICAgICAgc29ydD1GQUxTRSwKICAgICAgICAgICAgICAgIG1hcmtlciA9IGxpc3QoY29sb3JzID0gc3VuYnVyc3RERiRjb2xvciksCiAgICAgICAgICAgICAgICBkb21haW4gPSBsaXN0KGNvbHVtbiA9IDEpLAogICAgICAgICAgICAgICAgYnJhbmNodmFsdWVzID0gJ3RvdGFsJwogICAgICAgICAgICAgICAgKSU+JQogICAgICBsYXlvdXQoZ3JpZCA9IGxpc3QoY29sdW1ucyA9MSwgcm93cyA9IDEpLAogICAgICAgICAgICAgIG1hcmdpbiA9IGxpc3QobCA9IDAsIHIgPSAwLCBiID0gMCwgdCA9IDApCiAgICAgICkKCgoKYGBgCgpgYGB7ciBwbG90IHRyZWUsIGZpZy53aWR0aD02LCBmaWcuaGVpZ2h0PTYsZWNobz1UfQpwCmBgYAoKCg==