如何选择r中最小值和最大值之间尽可能均匀间隔的n个值?

问题描述 投票:0回答:1

考虑

n
结果 (gp) 的组 (
ngp
),如何选择/子集给定数量的结果 (
nesgp
),这些结果在最小值和最大值(都必须包括在内)之间尽可能均匀地间隔在新专栏中
selec

> print(dat, n=56)
# A tibble: 56 x 4
   gp    result   ngp nesgp
   <chr>  <dbl> <dbl> <dbl>
 1 CA      1.64    24    15
 2 CA      1.69    24    15
 3 CA      1.71    24    15
 4 CA      1.74    24    15
 5 CA      1.78    24    15
 6 CA      1.82    24    15
 7 CA      1.86    24    15
 8 CA      1.9     24    15
 9 CA      1.94    24    15
10 CA      1.98    24    15
11 CA      2.6     24    15
12 CA      2.65    24    15
13 CA      2.71    24    15
14 CA      2.76    24    15
15 CA      2.83    24    15
16 CA      2.89    24    15
17 CA      2.94    24    15
18 CA      3       24    15
19 CA      3.22    24    15
20 CA      3.42    24    15
21 CA      3.47    24    15
22 CA      3.68    24    15
23 CA      3.85    24    15
24 CA      4.38    24    15
25 ASAT    9       20    12
26 ASAT   11       20    12
27 ASAT   51       20    12
28 ASAT   61       20    12
29 ASAT   69       20    12
30 ASAT   78       20    12
31 ASAT   89       20    12
32 ASAT  102       20    12
33 ASAT  111       20    12
34 ASAT  120       20    12
35 ASAT  146       20    12
36 ASAT  163       20    12
37 ASAT  189       20    12
38 ASAT  208       20    12
39 ASAT  218       20    12
40 ASAT  304       20    12
41 ASAT  332       20    12
42 ASAT  345       20    12
43 ASAT  362       20    12
44 ASAT  402       20    12
45 ORO     0.56    12     8
46 ORO     0.7     12     8
47 ORO     0.77    12     8
48 ORO     0.78    12     8
49 ORO     0.82    12     8
50 ORO     0.82    12     8
51 ORO     0.92    12     8
52 ORO     0.94    12     8
53 ORO     1.16    12     8
54 ORO     1.46    12     8
55 ORO     1.54    12     8
56 ORO     1.77    12     8 

数据

dat <-
structure(list(gp = c("CA", "CA", "CA", "CA", "CA", "CA", "CA", 
"CA", "CA", "CA", "CA", "CA", "CA", "CA", "CA", "CA", "CA", "CA", 
"CA", "CA", "CA", "CA", "CA", "CA", "ASAT", "ASAT", "ASAT", "ASAT", 
"ASAT", "ASAT", "ASAT", "ASAT", "ASAT", "ASAT", "ASAT", "ASAT", 
"ASAT", "ASAT", "ASAT", "ASAT", "ASAT", "ASAT", "ASAT", "ASAT", 
"ORO", "ORO", "ORO", "ORO", "ORO", "ORO", "ORO", "ORO", "ORO", 
"ORO", "ORO", "ORO"), result = c(1.64, 1.69, 1.71, 1.74, 1.78, 
1.82, 1.86, 1.9, 1.94, 1.98, 2.6, 2.65, 2.71, 2.76, 2.83, 2.89, 
2.94, 3, 3.22, 3.42, 3.47, 3.68, 3.85, 4.38, 9, 11, 51, 61, 69, 
78, 89, 102, 111, 120, 146, 163, 189, 208, 218, 304, 332, 345, 
362, 402, 0.56, 0.7, 0.77, 0.78, 0.82, 0.82, 0.92, 0.94, 1.16, 
1.46, 1.54, 1.77), ngp = c(24, 24, 24, 24, 24, 24, 24, 24, 24, 
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 20, 
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 
20, 20, 20, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12), 
    nesgp = c(15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 
    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 12, 12, 12, 
    12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 
    12, 12, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8)), class = c("tbl_df", 
"tbl", "data.frame"), row.names = c(NA, -56L)) 

感谢您的帮助。

r select grouping evenly
1个回答
0
投票

以下解决方案使用了辅助功能

fun
,也许还有更简单的方法可以做到这一点。

suppressPackageStartupMessages(
  library(tidyverse)
)

fun <- function(x, n, na.rm = FALSE) {
  xmin <- min(x, na.rm = na.rm)
  xmax <- max(x, na.rm = na.rm)
  ref <- seq(xmin, xmax, length.out = n)
  x <- sort(x)
  j <- findInterval(ref, x)
  y <- numeric(n)
  y[1L] <- xmin
  y[n] <- xmax
  for(i in seq_len(n)[-c(1L, n)]) {
    if(abs(ref[i] - x[ j[i] ]) < abs(ref[i] - x[ j[i + 1L] ])) {
      y[i] <- x[ j[i] ]
    } else y[i] <- x[ j[i + 1L] ]
  }
  y
}
dat %>%
  reframe(selec = fun(result, first(ngp)), .by = gp)
#> # A tibble: 56 × 2
#>    gp    selec
#>    <chr> <dbl>
#>  1 CA     1.64
#>  2 CA     1.74
#>  3 CA     1.86
#>  4 CA     1.98
#>  5 CA     1.98
#>  6 CA     1.98
#>  7 CA     1.98
#>  8 CA     1.98
#>  9 CA     2.71
#> 10 CA     2.71
#> # ℹ 46 more rows

创建于 2024-02-04,使用 reprex v2.0.2

© www.soinside.com 2019 - 2024. All rights reserved.