Question

我在 R 中有这个图形网络：

library(igraph)
n_rows <- 10
n_cols <- 5
g <- make_lattice(dimvector = c(n_cols, n_rows))

layout <- layout_on_grid(g, width = n_cols)

n_nodes <- vcount(g)
node_colors <- rep("white", n_nodes)

for (row in 0:(n_rows-1)) {
    start_index <- row * n_cols + 1
    node_colors[start_index:(start_index+2)] <- "orange"  
    node_colors[(start_index+3):(start_index+4)] <- "purple"    
}

node_labels <- 1:n_nodes

plot(g, 
     layout = layout, 
     vertex.color = node_colors,
     vertex.label = node_labels,
     vertex.label.color = "black",
     vertex.size = 15,
     edge.color = "gray",
     main = "Rectangular Undirected Network")

我正在尝试编写一个函数，将该网络随机分成 5 个连通的子图（即迷你图），使得每个节点恰好出现一次。

我认为从理论上讲，这应该不太难。我需要随机识别一个节点，随机决定要包含多少个邻居，选择这些邻居并将它们从图中删除……然后在剩余的图上重新启动此过程。当然，需要指定一些其他细节，例如，如果指定的随机数超过剩余节点的数量，则使用最大函数，需要使用 BFS 来选择节点等。

这是我第一次尝试编写代码：

get_connected_subgraph <- function(graph, available_nodes, min_nodes = 5, max_nodes = 15) {
    if (length(available_nodes) == 0) return(NULL)
    
    start_node <- sample(available_nodes, 1)
    
    bfs_result <- bfs(graph, root = start_node, unreachable = FALSE, order = TRUE, rank = TRUE, father = TRUE)
    
    bfs_order <- intersect(bfs_result$order, available_nodes)
    
    n_subgraph_nodes <- min(sample(min_nodes:max_nodes, 1), length(bfs_order))
    
    subgraph_nodes <- bfs_order[1:n_subgraph_nodes]
    
    return(subgraph_nodes)
}

create_5_subgraphs <- function(graph) {
    available_nodes <- V(graph)
    subgraphs <- list()
    
    for (i in 1:5) {
        subgraph_nodes <- get_connected_subgraph(graph, available_nodes)
        if (is.null(subgraph_nodes)) break
        
        subgraphs[[i]] <- subgraph_nodes
        available_nodes <- setdiff(available_nodes, subgraph_nodes)
    }
    
    return(subgraphs)
}

set.seed(42) 
subgraphs <- create_5_subgraphs(g)

subgraph_colors <- c("red", "blue", "green", "yellow", "purple")

node_subgraph_colors <- rep("lightgray", vcount(g))
for (i in 1:length(subgraphs)) {
    node_subgraph_colors[subgraphs[[i]]] <- subgraph_colors[i]
}

edge_subgraph_colors <- rep("lightgray", ecount(g))
for (i in 1:length(subgraphs)) {
    subgraph_edges <- E(g)[.inc(subgraphs[[i]])]
    edge_subgraph_colors[subgraph_edges] <- subgraph_colors[i]
}

plot(g, 
     layout = layout,
     vertex.color = node_subgraph_colors,
     vertex.label = node_labels,
     vertex.label.color = "black",
     vertex.size = 15,
     edge.color = edge_subgraph_colors,
     edge.width = 2,
     main = "Network with 5 Separate Connected Subgraphs")

上述结果看起来几乎正确，但黄色节点（例如 29）似乎违反了连通性。

关于如何修复此问题有什么指示吗？

我编写了一些可选代码来比较前后情况：

node_info <- data.frame(
    Node_Index = 1:vcount(g),
    Original_Color = node_colors,
    New_Color = node_subgraph_colors
)

get_subgraph_number <- function(node) {
    subgraph_num <- which(sapply(subgraphs, function(x) node %in% x))
    if (length(subgraph_num) == 0) return(NA)
    return(subgraph_num)
}

node_info$Subgraph_Number <- sapply(node_info$Node_Index, get_subgraph_number)

head(node_info)

为了补充 jblood94 的惊人答案，这里有一个与 jblood94 的答案一起使用的快速绘图功能：

library(igraph)
library(data.table)

f <- function(g, n) {
    m <- length(g)
    dt <- setDT(as_data_frame(g))
    dt <- rbindlist(list(dt, dt[,.(from = to, to = from)]))
    dt[,group := 0L]
    used <- logical(m)
    s <- sample(m, n)
    used[s] <- TRUE
    m <- m - n
    dt[from %in% s, group := .GRP, from]
    
    while (m) {
        dt2 <- unique(
            dt[group != 0L & !used[to], .(grow = to, onto = group)][sample(.N)],
            by = "grow"
        )
        dt[dt2, on = .(from = grow), group := onto]
        used[dt2[[1]]] <- TRUE
        m <- m - nrow(dt2)
    }
    
    unique(dt[,to := NULL])[,.(vertices = .(from)), group]
}


plot_multiple_subgraphs <- function(n_plots = 25, n_rows = 10, n_cols = 5, n_subgraphs = 5) {
    g <- make_lattice(dimvector = c(n_cols, n_rows))
    layout <- layout_on_grid(g, width = n_cols)
    n_nodes <- vcount(g)
    
    color_palette <- c("red", "blue", "green", "yellow", "purple")
    
    par(mfrow = c(5, 5), mar = c(0.5, 0.5, 2, 0.5))
    
    for (i in 1:n_plots) {
        subgraphs <- f(g, n_subgraphs)
        
        node_colors <- rep("white", n_nodes)
        
        for (j in 1:nrow(subgraphs)) {
            nodes <- unlist(subgraphs$vertices[j])
            node_colors[nodes] <- color_palette[j]
        }
        
        plot(g, 
             layout = layout, 
             vertex.color = node_colors,
             vertex.label = NA,  
             vertex.size = 15,   
             edge.color = "gray",
             edge.width = 0.5,  
             main = paste("Partition", i),  
             cex.main = 0.8)     
    }
}

plot_multiple_subgraphs()

如果不需要子图大小约束，您可以直接删除边直到count_components(g_sub) == 5。我认为您的问题是您在原始图上执行 BFS 并取与剩余节点的交集。您可以先创建一个仅具有可用顶点的新图，然后对其进行 BFS。 — 
@n1000：非常感谢！根据尺寸约束，您的意思是…每个子图的最小尺寸？ — 
是的。我认为min_nodes=5这是对允许的子图分区的限制。 —

Accepted Answer

这是一个函数，它n从图中随机选择顶点g作为每个组的初始子图成员n，然后迭代地“增长”每个组，直到所有顶点都在子图中。

library(data.table)

f <- function(g, n) {
  m <- length(g)
  dt <- setDT(as_data_frame(g))
  dt <- rbindlist(list(dt, dt[,.(from = to, to = from)]))
  dt[,group := 0L]
  used <- logical(m)
  s <- sample(m, n)
  used[s] <- TRUE
  m <- m - n
  dt[from %in% s, group := .GRP, from]
  
  while (m) {
    dt2 <- unique(
      dt[group != 0L & !used[to], .(grow = to, onto = group)][sample(.N)],
      by = "grow"
    )
    dt[dt2, on = .(from = grow), group := onto]
    used[dt2[[1]]] <- TRUE
    m <- m - nrow(dt2)
  }
  
  unique(dt[,to := NULL])[,.(vertices = .(from), .N), group]
}

在 OP 的图表上演示：

set.seed(907044864)
f(g, 5L)
#>    group              vertices     N
#>    <int>                <list> <int>
#> 1:     1       1,2,3,6,7,8,...     9
#> 2:     2  4, 5, 9,10,13,14,...    13
#> 3:     3 21,22,26,27,31,36,...     9
#> 4:     4 23,28,29,32,33,38,...    10
#> 5:     5 30,34,35,39,40,44,...     9

注意：在迭代过程中，如果多个组尝试“生长”到同一个顶点，则随机选择获胜组。这是[sample(.N)]在使用找到所有候选生长后使用完成的dt[group != 0L & !used[to], .(grow = to, onto = group)]。

性能检查

测试将 100×100 的网格分成 10 组的性能：

system.time(dt <- f(make_lattice(c(100, 100)), 10))
#>    user  system elapsed 
#>    0.16    0.02    0.17
dt
#>     group                          vertices     N
#>     <int>                            <list> <int>
#>  1:     4                   1,2,3,4,5,6,...  2329
#>  2:     2             43,44,45,46,47,48,...  1093
#>  3:     1             87,88,89,90,91,92,...    99
#>  4:     3       695,696,697,795,796,797,...   380
#>  5:     5 1551,1552,1553,1554,1650,1651,...  1363
#>  6:     6 3171,3172,3173,3174,3175,3176,...  1048
#>  7:     7 5921,5922,5923,5924,5925,5926,...  2377
#>  8:     8 6169,6171,6269,6270,6271,6272,...   339
#>  9:     9 6475,6575,6576,6675,6676,6677,...   264
#> 10:    10 7980,7981,7982,7983,7984,7985,...   708

非常感谢！我发布了一个可视化代码作为补充 — 
太棒了！我想看看分区是什么样子的，但又不想编写可视化代码。 —

Answer 2

和：

library(igraph, warn.conflicts = FALSE)

n_rows <- 10
n_cols <- 5

g <- make_lattice(dimvector = c(n_cols, n_rows))

# Voronoi partitioning
set.seed(42)
V(g)$sub <- voronoi_cells(g, sample(V(g), 5), tiebreaker = "first")$membership

# summary
V(g)$names <- V(g)
as_data_frame(g, what = "vertices") |> 
  dplyr::summarise(names = list(names), size = lengths(names), .by = sub)
#>   sub                                                  names size
#> 1   2                              1, 2, 3, 6, 7, 11, 12, 16    8
#> 2   4                             4, 5, 8, 9, 10, 13, 14, 15    8
#> 3   1 17, 21, 22, 26, 27, 28, 31, 32, 33, 36, 37, 38, 41, 42   14
#> 4   3                     18, 19, 20, 23, 24, 25, 29, 30, 35    9
#> 5   0             34, 39, 40, 43, 44, 45, 46, 47, 48, 49, 50   11

withr::with_par(
  list(mar = c(0, 0, 0, 0)),
  plot(g, 
       layout = layout_on_grid(g, width = n_cols), 
       # membership IDs are 0-based, hence +1 to subset colors
       vertex.color = c("red", "blue", "green", "yellow", "purple")[V(g)$sub + 1],
       vertex.label.color = "black",
       vertex.size = 15,
       edge.color = "gray")
)

^{创建于 2024-09-13，使用}

使用恒定的生成器向量（顺序也起作用）和非随机决胜局，它们就是（随机可能不适用于小图）。当使用相同的节点序列调用时，您是否需要它是随机的？ —

Answer 3

我想说你的bfs方法是一个很好的开始，你可以使用bfs下面的方法

nrsubg <- 5
gg <- g <- g %>%
  set_vertex_attr("name", value = seq.int(vcount(.)))
szsubg <- diff(sort(c(0, vcount(g), sample(vcount(g) - 1, nrsubg - 1))))
vlst <- setNames(vector("list", nrsubg), seq.int(nrsubg))
for (i in seq_along(szsubg)) {
  vlst[[i]] <- names(head(bfs(gg, sample(V(gg)[which.min(degree(gg))], 1))$order, szsubg[i]))
  gg <- induced_subgraph(gg, V(gg)[!names(V(gg)) %in% vlst[[i]]])
}
g %>%
  set_vertex_attr("color", value = with(stack(vlst), ind[match(names(V(.)), values)]))

其中每个连接的“迷你图”的大小是随机的。

可视化

g %>% 
  plot(
    layout = layout,
    vertex.label = V(.)$name,
    vertex.label.color = "black",
    vertex.size = 15,
    edge.color = "gray",
    main = "Rectangular Undirected Network"
  )

例如显示如下所示的内容

非常感谢你的精彩回答！你对这个问题有什么想法吗？ —

r – 随机将图表拆分为迷你图表 – VoidCC

最佳答案
3

性能检查

可视化

最佳答案 3

性能检查

可视化

最佳答案
3