如何使用向量中的两个条件在r中运行for循环

问题描述 投票:0回答:3

我有以下数据框(参见下面的dput()):

                   Date     Time   Price Volume  VolumeSEK Bid.Price Ask.Price BidAskSpread MidPrice    Company
1005   11.09.2018 25204.72  98.500   1153 113570.500     98.58     98.58         0.00   98.580        SEB
1071   11.09.2018 25209.89 233.300    158  36861.400    233.30    233.80         0.50  233.550 Alfa Laval
88995  12.09.2018 25220.83 170.500    101  17220.500    170.50    170.60         0.10  170.550    Skanska
1115   11.09.2018 25224.86 233.300      1    233.300    233.30    233.70         0.40  233.500 Alfa Laval
89001  12.09.2018 25229.77  96.960    937  90851.520     96.96     97.04         0.08   97.000        SEB
259224 14.09.2018 25239.65 213.950    126  26957.700    214.00    214.20         0.20  214.100   Swedbank
329555 17.09.2018 25244.28 178.375     19   3389.125    178.35    178.60         0.25  178.475    Skanska
1177   11.09.2018 25248.27 233.400    127  29641.800    233.30    233.60         0.30  233.450 Alfa Laval
1197   11.09.2018 25256.45 286.600    267  76522.200    286.60    287.10         0.50  286.850   Kinnevik
1200   11.09.2018 25258.17  98.520     32   3152.640     98.30     98.38         0.08   98.340        SEB

和两个向量

Comp<- c("Skanska", "SEB", "Swedbank", "Kinnevik", "Investor", "Alfa Laval")
Day<- c("11.09.2018","12.09.2018", "13.09.2018", "14.09.2018", "15.09.2018", "16.09.2018", "17.09.2018")

我想计算每一天每个库存的平均VolumeSEK,并将结果保存在矩阵中。我对R很新,所以我的第一个猜测是使用for循环。但是,到目前为止,这对我没有用。这是我的方法,但我有点失落,这可能不是正确的方法。

    mat <- matrix(, nrow = 6, ncol = 7))
for (i in 1:Comp){
  for(j in 1:Day){
    mat[i,j]= mean(df$VolumeSEK[df$Company==Comp[i]& df$Date==Day[j]])
  }#2
}#1

任何反馈和提示都非常感谢。提前致谢!

df<-structure(list(X.RIC = structure(c(8L, 2L, 10L, 2L, 8L, 12L, 
    10L, 2L, 6L, 8L, 12L, 4L, 6L, 8L, 6L, 2L, 6L, 8L, 12L, 4L, 6L, 
    8L, 12L, 12L, 4L, 4L, 4L, 12L, 4L, 12L, 12L, 4L, 12L, 4L, 4L, 
    8L, 6L, 12L, 4L, 4L, 6L, 10L, 4L, 10L, 12L, 12L, 8L, 4L, 6L, 
    8L), .Label = c("ALFA.ST", "ALFAs.BCO", "INVEb.ST", "INVEBs.BCO", 
    "KINVb.ST", "KINVBs.BCO", "SEBa.ST", "SEBAs.BCO", "SKAb.ST", 
    "SKABs.BCO", "SWEDa.ST", "SWEDAs.BCO"), class = "factor"), Date = structure(c(1L, 
    1L, 2L, 1L, 2L, 4L, 5L, 1L, 1L, 1L, 1L, 4L, 2L, 1L, 3L, 2L, 5L, 
    1L, 1L, 1L, 4L, 2L, 1L, 3L, 2L, 1L, 1L, 5L, 1L, 1L, 5L, 1L, 5L, 
    1L, 1L, 5L, 5L, 1L, 1L, 1L, 4L, 5L, 1L, 2L, 4L, 1L, 3L, 4L, 5L, 
    5L), .Label = c("11.09.2018", "12.09.2018", "13.09.2018", "14.09.2018", 
    "17.09.2018"), class = "factor"), Time = c(25204.724866253, 25209.891063318, 
    25220.83, 25224.862743496, 25229.77, 25239.65, 25244.28, 25248.266841503, 
    25256.450392157, 25258.169598025, 25259.431887444, 25265.42, 
    25267.73, 25282.608168894, 25297.72, 25300.78, 25304.39, 25312.181336031, 
    25314.992406965, 25334.129581998, 25337.19, 25337.52, 25338.977745285, 
    25339.14, 25340.48, 25341.34500136, 25346.804459672, 25347.23, 
    25351.80572164, 25352.089646376, 25354.56, 25356.805147054, 25359.55, 
    25361.804327741, 25366.804555871, 25370.11, 25372.53, 25378.384314178, 
    25378.884337058, 25386.788916974, 25388.64, 25389.67, 25392.033315652, 
    25401.17, 25403.9, 25421.773090991, 25421.98, 25424.19, 25424.21, 
    25424.85), Type = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("Quote", 
    "Trade"), class = "factor"), Price = c(98.5, 233.3, 170.5, 233.3, 
    96.96, 213.95, 178.375, 233.4, 286.6, 98.52, 213.1, 409.75, 290.9, 
    98.42, 291.5, 235, 288.8, 98.4, 213.2, 407.8, 291.3, 96.78, 213, 
    212.3, 406.48, 407.9, 407.9, 212.8, 407.9, 213, 212.8, 407.9, 
    212.8, 407.9, 407.9, 96.86, 288.5, 213, 407.7, 407.9, 291.3, 
    178.7, 407.8, 170.9, 213.9, 212.9, 96.46, 409.7, 288.3, 96.88
    ), Volume = c(1153L, 158L, 101L, 1L, 937L, 126L, 19L, 127L, 267L, 
    32L, 64L, 17L, 31L, 733L, 100L, 130L, 51L, 46L, 214L, 21L, 78L, 
    155L, 55L, 761L, 295L, 121L, 6L, 113L, 5L, 350L, 4L, 5L, 3L, 
    6L, 5L, 711L, 567L, 350L, 13L, 8L, 4L, 110L, 587L, 607L, 1272L, 
    363L, 13L, 419L, 63L, 21L), Venue = structure(c(1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "SINT[GV5_TEXT]", class = "factor"), 
        VolumeSEK = c(113570.5, 36861.4, 17220.5, 233.3, 90851.52, 
        26957.7, 3389.125, 29641.8, 76522.2, 3152.64, 13638.4, 6965.75, 
        9017.9, 72141.86, 29150, 30550, 14728.8, 4526.4, 45624.8, 
        8563.8, 22721.4, 15000.9, 11715, 161560.3, 119911.6, 49355.9, 
        2447.4, 24046.4, 2039.5, 74550, 851.2, 2039.5, 638.4, 2447.4, 
        2039.5, 68867.46, 163579.5, 74550, 5300.1, 3263.2, 1165.2, 
        19657, 239378.6, 103736.3, 272080.8, 77282.7, 1253.98, 171664.3, 
        18162.9, 2034.48), Bid.Price = c(98.58, 233.3, 170.5, 233.3, 
        96.96, 214, 178.35, 233.3, 286.6, 98.3, 212.8, 409.6, 290.9, 
        98.38, 291.5, 235, 288.6, 98.38, 213.1, 407.5, 291.3, 96.76, 
        213, 212.1, 406.2, 407.5, 407.5, 212.8, 407.5, 213, 212.8, 
        407.5, 212.8, 407.5, 407.5, 96.88, 288.5, 213.1, 407.3, 407.6, 
        291.6, 178.6, 407.5, 170.75, 213.8, 213, 96.46, 409.3, 288.2, 
        96.88), Ask.Price = c(98.58, 233.8, 170.6, 233.7, 97.04, 
        214.2, 178.6, 233.6, 287.1, 98.38, 213, 410, 291.1, 98.42, 
        291.7, 235.4, 289, 98.46, 213.3, 407.8, 291.5, 96.78, 213.2, 
        212.4, 406.5, 407.9, 407.9, 213.1, 407.9, 213.1, 213.1, 407.9, 
        213.1, 407.9, 407.9, 96.98, 288.8, 213.2, 407.8, 407.9, 291.9, 
        178.75, 407.8, 170.95, 213.9, 213.1, 96.54, 409.6, 288.6, 
        96.96), BidAskSpread = c(0, 0.5, 0.1, 0.399999999999977, 
        0.08, 0.2, 0.25, 0.299999999999983, 0.5, 0.0799999999999983, 
        0.199999999999989, 0.4, 0.2, 0.0400000000000063, 0.2, 0.4, 
        0.4, 0.0799999999999983, 0.200000000000017, 0.300000000000011, 
        0.2, 0.02, 0.199999999999989, 0.3, 0.3, 0.399999999999977, 
        0.399999999999977, 0.3, 0.399999999999977, 0.0999999999999943, 
        0.3, 0.399999999999977, 0.3, 0.399999999999977, 0.399999999999977, 
        0.1, 0.3, 0.0999999999999943, 0.5, 0.299999999999955, 0.3, 
        0.15, 0.300000000000011, 0.2, 0.1, 0.0999999999999943, 0.08, 
        0.3, 0.4, 0.08), MidPrice = c(98.58, 233.55, 170.55, 233.5, 
        97, 214.1, 178.475, 233.45, 286.85, 98.34, 212.9, 409.8, 
        291, 98.4, 291.6, 235.2, 288.8, 98.42, 213.2, 407.65, 291.4, 
        96.77, 213.1, 212.25, 406.35, 407.7, 407.7, 212.95, 407.7, 
        213.05, 212.95, 407.7, 212.95, 407.7, 407.7, 96.93, 288.65, 
        213.15, 407.55, 407.75, 291.75, 178.675, 407.65, 170.85, 
        213.85, 213.05, 96.5, 409.45, 288.4, 96.92), Company = structure(c(4L, 
        1L, 5L, 1L, 4L, 6L, 5L, 1L, 3L, 4L, 6L, 2L, 3L, 4L, 3L, 1L, 
        3L, 4L, 6L, 2L, 3L, 4L, 6L, 6L, 2L, 2L, 2L, 6L, 2L, 6L, 6L, 
        2L, 6L, 2L, 2L, 4L, 3L, 6L, 2L, 2L, 3L, 5L, 2L, 5L, 6L, 6L, 
        4L, 2L, 3L, 4L), .Label = c("Alfa Laval", "Investor", "Kinnevik", 
        "SEB", "Skanska", "Swedbank"), class = "factor")), .Names = c("X.RIC", 
    "Date", "Time", "Type", "Price", "Volume", "Venue", "VolumeSEK", 
    "Bid.Price", "Ask.Price", "BidAskSpread", "MidPrice", "Company"
    ), row.names = c(1005L, 1071L, 88995L, 1115L, 89001L, 259224L, 
    329555L, 1177L, 1197L, 1200L, 1201L, 259266L, 89158L, 1253L, 
    178546L, 89199L, 329638L, 1312L, 1319L, 1369L, 259339L, 89245L, 
    1383L, 178643L, 89249L, 1385L, 1388L, 329712L, 1401L, 1404L, 
    329722L, 1412L, 329729L, 1418L, 1421L, 329762L, 329771L, 1437L, 
    1443L, 1471L, 259393L, 329810L, 1485L, 89373L, 259439L, 1532L, 
    178820L, 259511L, 329870L, 329871L), class = "data.frame")
r loops for-loop condition
3个回答
1
投票

data.table方法,转换为宽格式

library(data.table)
dcast( setDT(df)[ Company %in% Comp & Date %in% Day, ][, list( mean = mean(VolumeSEK)), by = .(Company, Date)], Company ~ Date )

#       Company 11.09.2018 12.09.2018 13.09.2018 14.09.2018 17.09.2018
# 1: Alfa Laval   22245.50   30550.00         NA         NA         NA
# 2:   Investor   31687.49  119911.60         NA   89315.02         NA
# 3:   Kinnevik   76522.20    9017.90   29150.00   11943.30   65490.40
# 4:        SEB   48347.85   52926.21    1253.98         NA   35450.97
# 5:    Skanska         NA   60478.40         NA         NA   11523.06
# 6:   Swedbank   49560.15         NA  161560.30  149519.25    8512.00

Company ~ Date切换到Date ~ Company进行相反的转换(即公司到列)。


1
投票

使用基本R:

aggregate(VolumeSEK ~ Date + Company, data=df, FUN = mean)

         Date    Company VolumeSEK
1  11.09.2018 Alfa Laval  22245.50
2  12.09.2018 Alfa Laval  30550.00
3  11.09.2018   Investor  31687.49
4  12.09.2018   Investor 119911.60
5  14.09.2018   Investor  89315.02
6  11.09.2018   Kinnevik  76522.20
7  12.09.2018   Kinnevik   9017.90
8  13.09.2018   Kinnevik  29150.00
9  14.09.2018   Kinnevik  11943.30
10 17.09.2018   Kinnevik  65490.40
11 11.09.2018        SEB  48347.85
12 12.09.2018        SEB  52926.21
13 13.09.2018        SEB   1253.98
14 17.09.2018        SEB  35450.97
15 12.09.2018    Skanska  60478.40
16 17.09.2018    Skanska  11523.06
17 11.09.2018   Swedbank  49560.15
18 13.09.2018   Swedbank 161560.30
19 14.09.2018   Swedbank 149519.25
20 17.09.2018   Swedbank   8512.00

这计算每个公司和日期对的VolumeSEK的平均值。这是一个很长的格式,但如果您需要它的宽度:

reshape(df2, idvar = "Company", timevar = "Date", direction = "wide")

      Company VolumeSEK.11.09.2018 VolumeSEK.12.09.2018 VolumeSEK.14.09.2018 VolumeSEK.13.09.2018 VolumeSEK.17.09.2018
1  Alfa Laval             22245.50             30550.00                   NA                   NA                   NA
3    Investor             31687.49            119911.60             89315.02                   NA                   NA
6    Kinnevik             76522.20              9017.90             11943.30             29150.00             65490.40
11        SEB             48347.85             52926.21                   NA              1253.98             35450.97
15    Skanska                   NA             60478.40                   NA                   NA             11523.06
17   Swedbank             49560.15                   NA            149519.25            161560.30              8512.00

其中df2是上面汇总的结果。

对于最后一步,您还可以使用reshape2库并执行:

library(reshape2)
dcast(df2, Company ~ Date, value.var = "VolumeSEK")

     Company 11.09.2018 12.09.2018 13.09.2018 14.09.2018 17.09.2018
1 Alfa Laval   22245.50   30550.00         NA         NA         NA
2   Investor   31687.49  119911.60         NA   89315.02         NA
3   Kinnevik   76522.20    9017.90   29150.00   11943.30   65490.40
4        SEB   48347.85   52926.21    1253.98         NA   35450.97
5    Skanska         NA   60478.40         NA         NA   11523.06
6   Swedbank   49560.15         NA  161560.30  149519.25    8512.00

这是一个更简单的命令,并提供更清晰的结果。


0
投票

这是tidyverse包的解决方案。请注意,此处不需要循环:

library(tidyverse)

df %>% as_tibble %>% group_by(Date, Company) %>% 
  summarise(x = mean(VolumeSEK)) %>% 
  ungroup() %>% 
  spread(Company, x)

# A tibble: 5 x 7
  Date       `Alfa Laval` Investor Kinnevik     SEB Skanska Swedbank
  <fct>             <dbl>    <dbl>    <dbl>   <dbl>   <dbl>    <dbl>
1 11.09.2018       22246.   31687.  76522.  48348.      NA    49560.
2 12.09.2018       30550   119912.   9017.9 52926.   60478.      NA 
3 13.09.2018          NA       NA   29150    1254.0     NA   161560.
4 14.09.2018          NA    89315.  11943.     NA       NA   149519.
5 17.09.2018          NA       NA   65490.  35451.   11523.    8512 
© www.soinside.com 2019 - 2024. All rights reserved.