我有以下数据框(参见下面的dput()
):
Date Time Price Volume VolumeSEK Bid.Price Ask.Price BidAskSpread MidPrice Company
1005 11.09.2018 25204.72 98.500 1153 113570.500 98.58 98.58 0.00 98.580 SEB
1071 11.09.2018 25209.89 233.300 158 36861.400 233.30 233.80 0.50 233.550 Alfa Laval
88995 12.09.2018 25220.83 170.500 101 17220.500 170.50 170.60 0.10 170.550 Skanska
1115 11.09.2018 25224.86 233.300 1 233.300 233.30 233.70 0.40 233.500 Alfa Laval
89001 12.09.2018 25229.77 96.960 937 90851.520 96.96 97.04 0.08 97.000 SEB
259224 14.09.2018 25239.65 213.950 126 26957.700 214.00 214.20 0.20 214.100 Swedbank
329555 17.09.2018 25244.28 178.375 19 3389.125 178.35 178.60 0.25 178.475 Skanska
1177 11.09.2018 25248.27 233.400 127 29641.800 233.30 233.60 0.30 233.450 Alfa Laval
1197 11.09.2018 25256.45 286.600 267 76522.200 286.60 287.10 0.50 286.850 Kinnevik
1200 11.09.2018 25258.17 98.520 32 3152.640 98.30 98.38 0.08 98.340 SEB
和两个向量
Comp<- c("Skanska", "SEB", "Swedbank", "Kinnevik", "Investor", "Alfa Laval")
Day<- c("11.09.2018","12.09.2018", "13.09.2018", "14.09.2018", "15.09.2018", "16.09.2018", "17.09.2018")
我想计算每一天每个库存的平均VolumeSEK,并将结果保存在矩阵中。我对R很新,所以我的第一个猜测是使用for循环。但是,到目前为止,这对我没有用。这是我的方法,但我有点失落,这可能不是正确的方法。
mat <- matrix(, nrow = 6, ncol = 7))
for (i in 1:Comp){
for(j in 1:Day){
mat[i,j]= mean(df$VolumeSEK[df$Company==Comp[i]& df$Date==Day[j]])
}#2
}#1
任何反馈和提示都非常感谢。提前致谢!
df<-structure(list(X.RIC = structure(c(8L, 2L, 10L, 2L, 8L, 12L,
10L, 2L, 6L, 8L, 12L, 4L, 6L, 8L, 6L, 2L, 6L, 8L, 12L, 4L, 6L,
8L, 12L, 12L, 4L, 4L, 4L, 12L, 4L, 12L, 12L, 4L, 12L, 4L, 4L,
8L, 6L, 12L, 4L, 4L, 6L, 10L, 4L, 10L, 12L, 12L, 8L, 4L, 6L,
8L), .Label = c("ALFA.ST", "ALFAs.BCO", "INVEb.ST", "INVEBs.BCO",
"KINVb.ST", "KINVBs.BCO", "SEBa.ST", "SEBAs.BCO", "SKAb.ST",
"SKABs.BCO", "SWEDa.ST", "SWEDAs.BCO"), class = "factor"), Date = structure(c(1L,
1L, 2L, 1L, 2L, 4L, 5L, 1L, 1L, 1L, 1L, 4L, 2L, 1L, 3L, 2L, 5L,
1L, 1L, 1L, 4L, 2L, 1L, 3L, 2L, 1L, 1L, 5L, 1L, 1L, 5L, 1L, 5L,
1L, 1L, 5L, 5L, 1L, 1L, 1L, 4L, 5L, 1L, 2L, 4L, 1L, 3L, 4L, 5L,
5L), .Label = c("11.09.2018", "12.09.2018", "13.09.2018", "14.09.2018",
"17.09.2018"), class = "factor"), Time = c(25204.724866253, 25209.891063318,
25220.83, 25224.862743496, 25229.77, 25239.65, 25244.28, 25248.266841503,
25256.450392157, 25258.169598025, 25259.431887444, 25265.42,
25267.73, 25282.608168894, 25297.72, 25300.78, 25304.39, 25312.181336031,
25314.992406965, 25334.129581998, 25337.19, 25337.52, 25338.977745285,
25339.14, 25340.48, 25341.34500136, 25346.804459672, 25347.23,
25351.80572164, 25352.089646376, 25354.56, 25356.805147054, 25359.55,
25361.804327741, 25366.804555871, 25370.11, 25372.53, 25378.384314178,
25378.884337058, 25386.788916974, 25388.64, 25389.67, 25392.033315652,
25401.17, 25403.9, 25421.773090991, 25421.98, 25424.19, 25424.21,
25424.85), Type = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("Quote",
"Trade"), class = "factor"), Price = c(98.5, 233.3, 170.5, 233.3,
96.96, 213.95, 178.375, 233.4, 286.6, 98.52, 213.1, 409.75, 290.9,
98.42, 291.5, 235, 288.8, 98.4, 213.2, 407.8, 291.3, 96.78, 213,
212.3, 406.48, 407.9, 407.9, 212.8, 407.9, 213, 212.8, 407.9,
212.8, 407.9, 407.9, 96.86, 288.5, 213, 407.7, 407.9, 291.3,
178.7, 407.8, 170.9, 213.9, 212.9, 96.46, 409.7, 288.3, 96.88
), Volume = c(1153L, 158L, 101L, 1L, 937L, 126L, 19L, 127L, 267L,
32L, 64L, 17L, 31L, 733L, 100L, 130L, 51L, 46L, 214L, 21L, 78L,
155L, 55L, 761L, 295L, 121L, 6L, 113L, 5L, 350L, 4L, 5L, 3L,
6L, 5L, 711L, 567L, 350L, 13L, 8L, 4L, 110L, 587L, 607L, 1272L,
363L, 13L, 419L, 63L, 21L), Venue = structure(c(1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "SINT[GV5_TEXT]", class = "factor"),
VolumeSEK = c(113570.5, 36861.4, 17220.5, 233.3, 90851.52,
26957.7, 3389.125, 29641.8, 76522.2, 3152.64, 13638.4, 6965.75,
9017.9, 72141.86, 29150, 30550, 14728.8, 4526.4, 45624.8,
8563.8, 22721.4, 15000.9, 11715, 161560.3, 119911.6, 49355.9,
2447.4, 24046.4, 2039.5, 74550, 851.2, 2039.5, 638.4, 2447.4,
2039.5, 68867.46, 163579.5, 74550, 5300.1, 3263.2, 1165.2,
19657, 239378.6, 103736.3, 272080.8, 77282.7, 1253.98, 171664.3,
18162.9, 2034.48), Bid.Price = c(98.58, 233.3, 170.5, 233.3,
96.96, 214, 178.35, 233.3, 286.6, 98.3, 212.8, 409.6, 290.9,
98.38, 291.5, 235, 288.6, 98.38, 213.1, 407.5, 291.3, 96.76,
213, 212.1, 406.2, 407.5, 407.5, 212.8, 407.5, 213, 212.8,
407.5, 212.8, 407.5, 407.5, 96.88, 288.5, 213.1, 407.3, 407.6,
291.6, 178.6, 407.5, 170.75, 213.8, 213, 96.46, 409.3, 288.2,
96.88), Ask.Price = c(98.58, 233.8, 170.6, 233.7, 97.04,
214.2, 178.6, 233.6, 287.1, 98.38, 213, 410, 291.1, 98.42,
291.7, 235.4, 289, 98.46, 213.3, 407.8, 291.5, 96.78, 213.2,
212.4, 406.5, 407.9, 407.9, 213.1, 407.9, 213.1, 213.1, 407.9,
213.1, 407.9, 407.9, 96.98, 288.8, 213.2, 407.8, 407.9, 291.9,
178.75, 407.8, 170.95, 213.9, 213.1, 96.54, 409.6, 288.6,
96.96), BidAskSpread = c(0, 0.5, 0.1, 0.399999999999977,
0.08, 0.2, 0.25, 0.299999999999983, 0.5, 0.0799999999999983,
0.199999999999989, 0.4, 0.2, 0.0400000000000063, 0.2, 0.4,
0.4, 0.0799999999999983, 0.200000000000017, 0.300000000000011,
0.2, 0.02, 0.199999999999989, 0.3, 0.3, 0.399999999999977,
0.399999999999977, 0.3, 0.399999999999977, 0.0999999999999943,
0.3, 0.399999999999977, 0.3, 0.399999999999977, 0.399999999999977,
0.1, 0.3, 0.0999999999999943, 0.5, 0.299999999999955, 0.3,
0.15, 0.300000000000011, 0.2, 0.1, 0.0999999999999943, 0.08,
0.3, 0.4, 0.08), MidPrice = c(98.58, 233.55, 170.55, 233.5,
97, 214.1, 178.475, 233.45, 286.85, 98.34, 212.9, 409.8,
291, 98.4, 291.6, 235.2, 288.8, 98.42, 213.2, 407.65, 291.4,
96.77, 213.1, 212.25, 406.35, 407.7, 407.7, 212.95, 407.7,
213.05, 212.95, 407.7, 212.95, 407.7, 407.7, 96.93, 288.65,
213.15, 407.55, 407.75, 291.75, 178.675, 407.65, 170.85,
213.85, 213.05, 96.5, 409.45, 288.4, 96.92), Company = structure(c(4L,
1L, 5L, 1L, 4L, 6L, 5L, 1L, 3L, 4L, 6L, 2L, 3L, 4L, 3L, 1L,
3L, 4L, 6L, 2L, 3L, 4L, 6L, 6L, 2L, 2L, 2L, 6L, 2L, 6L, 6L,
2L, 6L, 2L, 2L, 4L, 3L, 6L, 2L, 2L, 3L, 5L, 2L, 5L, 6L, 6L,
4L, 2L, 3L, 4L), .Label = c("Alfa Laval", "Investor", "Kinnevik",
"SEB", "Skanska", "Swedbank"), class = "factor")), .Names = c("X.RIC",
"Date", "Time", "Type", "Price", "Volume", "Venue", "VolumeSEK",
"Bid.Price", "Ask.Price", "BidAskSpread", "MidPrice", "Company"
), row.names = c(1005L, 1071L, 88995L, 1115L, 89001L, 259224L,
329555L, 1177L, 1197L, 1200L, 1201L, 259266L, 89158L, 1253L,
178546L, 89199L, 329638L, 1312L, 1319L, 1369L, 259339L, 89245L,
1383L, 178643L, 89249L, 1385L, 1388L, 329712L, 1401L, 1404L,
329722L, 1412L, 329729L, 1418L, 1421L, 329762L, 329771L, 1437L,
1443L, 1471L, 259393L, 329810L, 1485L, 89373L, 259439L, 1532L,
178820L, 259511L, 329870L, 329871L), class = "data.frame")
data.table方法,转换为宽格式
library(data.table)
dcast( setDT(df)[ Company %in% Comp & Date %in% Day, ][, list( mean = mean(VolumeSEK)), by = .(Company, Date)], Company ~ Date )
# Company 11.09.2018 12.09.2018 13.09.2018 14.09.2018 17.09.2018
# 1: Alfa Laval 22245.50 30550.00 NA NA NA
# 2: Investor 31687.49 119911.60 NA 89315.02 NA
# 3: Kinnevik 76522.20 9017.90 29150.00 11943.30 65490.40
# 4: SEB 48347.85 52926.21 1253.98 NA 35450.97
# 5: Skanska NA 60478.40 NA NA 11523.06
# 6: Swedbank 49560.15 NA 161560.30 149519.25 8512.00
将Company ~ Date
切换到Date ~ Company
进行相反的转换(即公司到列)。
使用基本R:
aggregate(VolumeSEK ~ Date + Company, data=df, FUN = mean)
Date Company VolumeSEK
1 11.09.2018 Alfa Laval 22245.50
2 12.09.2018 Alfa Laval 30550.00
3 11.09.2018 Investor 31687.49
4 12.09.2018 Investor 119911.60
5 14.09.2018 Investor 89315.02
6 11.09.2018 Kinnevik 76522.20
7 12.09.2018 Kinnevik 9017.90
8 13.09.2018 Kinnevik 29150.00
9 14.09.2018 Kinnevik 11943.30
10 17.09.2018 Kinnevik 65490.40
11 11.09.2018 SEB 48347.85
12 12.09.2018 SEB 52926.21
13 13.09.2018 SEB 1253.98
14 17.09.2018 SEB 35450.97
15 12.09.2018 Skanska 60478.40
16 17.09.2018 Skanska 11523.06
17 11.09.2018 Swedbank 49560.15
18 13.09.2018 Swedbank 161560.30
19 14.09.2018 Swedbank 149519.25
20 17.09.2018 Swedbank 8512.00
这计算每个公司和日期对的VolumeSEK的平均值。这是一个很长的格式,但如果您需要它的宽度:
reshape(df2, idvar = "Company", timevar = "Date", direction = "wide")
Company VolumeSEK.11.09.2018 VolumeSEK.12.09.2018 VolumeSEK.14.09.2018 VolumeSEK.13.09.2018 VolumeSEK.17.09.2018
1 Alfa Laval 22245.50 30550.00 NA NA NA
3 Investor 31687.49 119911.60 89315.02 NA NA
6 Kinnevik 76522.20 9017.90 11943.30 29150.00 65490.40
11 SEB 48347.85 52926.21 NA 1253.98 35450.97
15 Skanska NA 60478.40 NA NA 11523.06
17 Swedbank 49560.15 NA 149519.25 161560.30 8512.00
其中df2
是上面汇总的结果。
对于最后一步,您还可以使用reshape2
库并执行:
library(reshape2)
dcast(df2, Company ~ Date, value.var = "VolumeSEK")
Company 11.09.2018 12.09.2018 13.09.2018 14.09.2018 17.09.2018
1 Alfa Laval 22245.50 30550.00 NA NA NA
2 Investor 31687.49 119911.60 NA 89315.02 NA
3 Kinnevik 76522.20 9017.90 29150.00 11943.30 65490.40
4 SEB 48347.85 52926.21 1253.98 NA 35450.97
5 Skanska NA 60478.40 NA NA 11523.06
6 Swedbank 49560.15 NA 161560.30 149519.25 8512.00
这是一个更简单的命令,并提供更清晰的结果。
这是tidyverse
包的解决方案。请注意,此处不需要循环:
library(tidyverse)
df %>% as_tibble %>% group_by(Date, Company) %>%
summarise(x = mean(VolumeSEK)) %>%
ungroup() %>%
spread(Company, x)
# A tibble: 5 x 7
Date `Alfa Laval` Investor Kinnevik SEB Skanska Swedbank
<fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 11.09.2018 22246. 31687. 76522. 48348. NA 49560.
2 12.09.2018 30550 119912. 9017.9 52926. 60478. NA
3 13.09.2018 NA NA 29150 1254.0 NA 161560.
4 14.09.2018 NA 89315. 11943. NA NA 149519.
5 17.09.2018 NA NA 65490. 35451. 11523. 8512