我有一个网络,网络中有一个焦点节点。我已经计算了从该节点到所有其他节点的最短路径,并且我试图弄清楚如何根据最短路径计算来制作原始网络的子图。
目标是能够显示从我的焦点节点到任何其他符合条件的节点的所有路径(例如路径小于 50m 或长于 100m 但短于 500m)。
library(sfnetworks)
library(sf)
library(tidygraph)
library(dplyr)
library(ggplot2)
# the base network
net <- as_sfnetwork(roxel, directed = FALSE) %>%
st_transform(3035) %>%
activate("edges") %>%
mutate(weight = edge_length())
# the network with the focal point
ggplot() +
geom_sf(data = net %>%
activate("edges") %>%
st_as_sf(), colour = "grey90") +
geom_sf(data = net %>%
activate("nodes") %>%
st_as_sf()) +
geom_sf(data = net %>%
activate("nodes") %>%
slice(495) %>%
st_as_sf(), size = 3.5, fill = "orange",
colour = "black", shape = 21) +
theme_void()
我可以使用
sfnetworks
包装器或直接从 igraph
: 获得最短路径
short_paths_sf <- st_network_paths(net %>% activate(nodes),
from = 495, to = c(1:701), weights = "weight")
short_paths_ig <- igraph::shortest_paths(
graph = net,
from = 495,
to = c(1:701),
output = "both",
weights = net %>% activate(edges) %>% pull(weight)
)
net
对象制作子图并仅包含最短路径和 2) 然后转换子图以便我可以对 whole path 的长度进行操作,而不仅仅是边的权重。我知道我可以用类似的东西得到两点之间单条路径的长度
single_path <- st_network_paths(net %>% activate(nodes),
from = 495,
to = 701,
weights = "weight") %>%
pull(edge_paths) %>%
unlist()
net %>%
activate(edges) %>%
slice(single_path) %>%
st_as_sf() %>%
st_combine() %>%
st_length()
> 482.0495 [m]
我可以用它来绘制
ggplot() +
geom_sf(data = net %>%
activate("edges") %>%
st_as_sf(), colour = "grey90") +
geom_sf(data = net %>%
activate("nodes") %>%
st_as_sf()) +
geom_sf(data = net %>%
activate("nodes") %>%
slice(495) %>%
st_as_sf(), size = 3.5, fill = "orange",
colour = "black", shape = 21) +
geom_sf(data = net %>%
activate(edges) %>%
slice(single_path) %>%
st_as_sf(), colour = "firebrick") +
theme_void()
但我正在努力弄清楚如何针对单对多情况执行此操作,然后特别是如何过滤这些路径,例如,我只能查看从点 495 开始的比短于500m.
我尝试应用作者here在最短路径部分中的建议:
sub_graph <- net %>%
igraph::subgraph.edges(eids = short_paths_ig$epath %>% unlist()) %>%
as_tbl_graph()
但是我不清楚这将如何显示我的最短路径?
非常感谢任何帮助!!
我花了一段时间找出我的第一个解决方案的更好版本(如下),所以这是一个 much 更快的方法(我没有逐步对最终解决方案进行基准测试,但它最终快了 50 倍以上)。
所以我们以完全相同的方式开始:
library(sfnetworks)
library(parallel)
library(tidyverse)
# Load the roxel dataset
net <- as_sfnetwork(roxel, directed = FALSE) %>%
st_transform(3035) %>%
activate("edges") %>%
mutate(weight = edge_length())
计算焦点节点到所有其他节点的最短路径,并保存边和节点路径:
paths = st_network_paths(net, from = 495, weights = "weight")
nodes_all <- paths %>%
pull(node_paths)
edges_all <- paths %>%
pull(edge_paths)
现在,最长的部分实际上是获取每条路径的长度以根据我们的标准进行检查,因此我将此过程放入一个函数中并将其并行化。首先是功能:
slice_fun <- function(net, temp_edges) {
#' Find path length of one path
#'
#' @description Taking a single path (i.e. set of edges), get the length
#' of that path, and remove the units on it
#' @param net sfnetwork. A network of sfnetwork
#' @param temp_edges vector. The edges of the path at hand
#'
#' @return numeric value (units removed) of the length of that path
return(units::drop_units(net %>%
activate("edges") %>%
slice(temp_edges) %>%
st_as_sf() %>%
st_combine() %>%
st_length()))
}
现在与
parSapply()
并行化(这是 最大 节省时间,因为所有 sf::
/sfnetworks::
函数的长度计算时间最长):
start_time <- Sys.time()
# start the cluster
cl <- parallel::makeCluster(8)
# the libraries need to be evaluated so they're available on all workers
parallel::clusterEvalQ(cl, {library(dplyr); library(sfnetworks);
library(magrittr); library(sf)})
# export the objects you need for the process
parallel::clusterExport(cl, varlist = c("edges_all", "net"))
# run in parallel
result <- parSapply(cl, edges_all, slice_fun, net = net)
parallel::stopCluster(cl)
end_time <- Sys.time()
end_time - start_time
# > Time difference of 14.49258 secs
现在,找到符合我们标准的
edges_all
的索引(这里,我们只想要长度超过1000m的路径),并通过这些索引过滤边缘,unlist()
所以所有的边缘都在一起,并且只保留唯一的边缘:
indices_keep <- which(result > 1000)
keep_edges <- unique(edges_all[indices_keep] %>% unlist())
如果我们想要路径末端的节点满足我们的要求,我们可以这样做:
# we can get the nodes if we want too
nodes_to_keep_all <- nodes_all[indices_keep]
nodes_to_keep <- unlist(lapply(nodes_to_keep_all, tail, n = 1L) %>% unlist())
现在剧情:
ggplot() +
geom_sf(data = net %>%
activate("edges") %>%
st_as_sf(), colour = "grey90") +
geom_sf(data = net %>%
activate("nodes") %>%
st_as_sf(), colour = "grey90") +
geom_sf(data = net %>%
activate("edges") %>%
slice(keep_edges) %>%
st_as_sf()
) +
geom_sf(data = net %>%
activate("nodes") %>%
slice(495) %>%
st_as_sf(), size = 3.5, fill = "orange", colour = "black", shape = 21) +
geom_sf(data = net %>%
activate("nodes") %>%
slice(nodes_to_keep) %>%
st_as_sf(), size = 2, fill = "red", colour = "black", shape = 21,
alpha = 0.4) +
theme_void()
好的,所以我想出了一个自定义解决方案,其中的标准是路径是否 > 1000m。
我们可以从获取我们的网络开始:
net = as_sfnetwork(roxel, directed = FALSE) %>%
st_transform(3035) %>%
activate("edges") %>%
mutate(weight = edge_length())
然后我们计算从焦点节点到所有其他节点的最短路径,并将该运行的
edge_paths
和node_paths
保存到对象以便于访问。
paths = st_network_paths(net, from = 495, weights = "weight")
# get nodes & edges both as paths (this results in a list format)
nodes_all <- paths %>%
pull(node_paths)
edges_all <- paths %>%
pull(edge_paths)
现在我写了一个我已经not优化的自定义函数,它根本不是很快,但它是我正在尝试做的事情的详细明确版本。该函数采用特定路径,确定该路径是否足够长以符合我的标准,如果是,则将边缘保留在该路径中。还有一个选项可以保留或省略那些足够长的路径的 end 处的节点。
功能:
len_crit <- function(net, edges, nodes = NULL) {
#' Look at whether or not each of the individual paths calcualted actually
#' pass the required test
#'
#' @description Look at whether or not each of the individual paths
#' calculated actually pass the required test
#' @param net sfnetwork. A network of sfnetwork
#' @param slice_val integer. The value to slice the edges into
#' @param edges list. The list of the edges in the shortest path
# initialize empty vector
all_edges <- as.numeric()
# initialize empty vector for nodes if applicable
if(!is.null(nodes)) {
all_nodes <- as.numeric()
}
# go through each of the slices (aka each of the paths)
for(slice in 1:length(edges)) {
# check what the temporary path length is
temp_len <- net %>%
activate("edges") %>%
slice(edges[[slice]]) %>%
st_as_sf() %>%
st_combine() %>%
st_length()
# if the temporary length is long enough, add the edges of that path to
# the total edges
if(temp_len > units::set_units(1000, m)) {
# if the length of the current path is long enough, add it to all_edges
all_edges <- c(all_edges, edges[[slice]])
# if we also want to plot the nodes, we can do so
if(!is.null(nodes)) {
# if the length is long enough, keep the LAST node in that set
all_nodes <- c(all_nodes, nodes[[slice]][[length(nodes[[slice]])]])
}
}
}
# if the nodes are selected return both that and the edges
if(!is.null(nodes)) {
# keep only the unique ones
unique_nodes <- unique(all_nodes)
# keep only the unique ones
unique_edges <- unique(all_edges)
# list up both
nodes_edges <- list(
nodes = unique_nodes,
edges = unique_edges
)
# return both
return(nodes_edges)
}
# keep only the unique ones
unique_edges <- unique(all_edges)
return(unique_edges)
}
现在我可以运行它了:
short_edges <- len_crit(net = net, edges = edges_all, nodes = nodes_all)
并通过切片网络来绘制它,分别使用节点和边缘的
activate
,并通过short_edges$nodes/edges
:
ggplot() +
geom_sf(data = net %>%
activate("edges") %>%
st_as_sf(), colour = "grey90") +
geom_sf(data = net %>%
activate("nodes") %>%
st_as_sf(), colour = "grey90") +
geom_sf(data = net %>%
activate("edges") %>%
slice(short_edges$edges) %>%
st_as_sf()
) +
geom_sf(data = net %>%
activate("nodes") %>%
slice(495) %>%
st_as_sf(), size = 3.5, fill = "orange", colour = "black", shape = 21) +
geom_sf(data = net %>%
activate("nodes") %>%
slice(short_edges$nodes) %>%
st_as_sf(), size = 2, fill = "red", colour = "black", shape = 21,
alpha = 0.4) +
theme_void()
这正是我想要的:)