((我正在编辑我的问题以使其可重复)。我的意思是我有一个函数,该函数执行许多操作以从一组数据(例如my_data)中找到均值,方差矩阵,T2样本值和控制限制。现在,我必须将T2值与控制限值进行比较,如果任何T2值大于控制限值,我想从数据集中删除整个样本(在主数据集中表示为一行),然后重新对其余样本执行我的操作(以便获得新的数据集),直到没有T2值大于限制。我需要存储均值和方差矩阵,因此我在函数中使用了<
## Implementation of the T2 C.C and EWMA-R c.c for the "On-line monitoring" article
## Creating a sample set of data for my online article; representing width as x_points and speed as y_points
width1 <- c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1)
width2 <- c(2, 2, 2, 2, 2, 2, 2, 2, 2, 2)
width3 <- c(3, 3, 3, 3, 3, 3, 3, 3, 3, 3)
corr_speed1 <- c(0.33, 0.37, 0.54, 0.52, 0.66, 0.64, 0.45, 0.66, 0.56, 0.33)
corr_speed2 <- c(0.47, 0.62, 0.57, 0.45, 0.53, 0.4, 0.49, 0.43, 0.38, 0.52)
corr_speed3 <- c(0.5, 0.37, 0.39, 0.72, 0.33, 0.54, 0.43, 0.35, 0.55, 0.41)
sample_boat <- c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10)
my_data <- data.frame(sample_boat, width1, width2, width3, corr_speed1, corr_speed2, corr_speed3)
T2_article_chart <- function(m){
while(nrow(m)>0){
x_data <- m[2:4]
y_data <- m[5:7]
mean_x_data <- mean(apply(x_data, MARGIN=1, mean))
mean_y_data <- apply(y_data, MARGIN=1, mean)
autocorr_x <- mean(apply(x_data, MARGIN=1, var)*(length(x_data)-1))
## Finding estimates as per sample sizes
diff_x <- x_data-mean_x_data
mult_yx <- y_data*diff_x
a1_m <- apply(mult_yx, MARGIN=1, sum)/autocorr_x
a0_m <- mean_y_data -a1_m*mean_x_data
resi_m <- y_data-a0_m-a1_m*x_data
square_resi <- resi_m^2
app_func_resi <- apply(square_resi, MARGIN=1, sum)
MSE_m <- app_func_resi/(length(x_data)-2)
MSE_all <- sum(MSE_m)/length(MSE_m)
var_a0_m <- MSE_all^2*(1/length(x_data)+mean_x_data^2*(1/autocorr_x))
var_a1_m <- MSE_all^2*(1/autocorr_x)
cov_a1_a0_m <- -(MSE_all^2*mean_x_data*(1/autocorr_x))
cov_a1_a0_m <- mean(cov_a1_a0_m)
## For all the samples j=1,2,...,k
# First we need to define the number of samples we have (represented by the number of MFC in our article)
k <- length(a0_m)
a0 <- sum(a0_m)/length(a0_m)
a1 <- sum(a1_m)/length(a1_m)
var_a0 <- var_a0_m/length(var_a0_m)
var_a1 <- var_a1_m/length(var_a1_m)
## We should now define the expected value and the var-cov matrix for each
# sample and find the sample statistic for our Tsquare control chart
# when expected mean and cov_var matrix are unknown for Phase I
z_m <- data.frame(a0_m, a1_m)
expected_value_U <<- c(a0, a1)
var_cov_matrix <<- matrix(c(var_a0, cov_a1_a0_m, cov_a1_a0_m, var_a1), nrow=2, ncol=2, byrow=TRUE)
#T2_m <- (z_m-expected_value_U)%*%solve(var_cov_matrix)%*%t(z_m-expected_value_U)*(k/(k-1))
subs_id1 <- vector()
subs_id2 <- vector()
for(i in 1:nrow(z_m)){
subs_id1[i] <- z_m$a0_m[i]-expected_value_U[1]
subs_id2[i] <- z_m$a1_m[i]-expected_value_U[2]
}
samp_mean_diffs <- data.frame(subs_id1,subs_id2)
T2_samples <- vector()
for(i in 1:nrow(samp_mean_diffs)){
T2_samples[i] <- as.matrix(samp_mean_diffs[i,])%*%solve(var_cov_matrix)%*%t(as.matrix(samp_mean_diffs[i,]))
}
T2_samples <- cbind(T2_samples)
# Control limits for T2 control chart
upper_control_limit_t2 <- 2*qf(0.95,2,(length(x_data)-2)*k)
for(i in 1:nrow(T2_samples)){
if(T2_samples[i]>upper_control_limit_t2){
m <- m[-c(i),]
repeat{T2_article_chart(m)}
break
}
}
}
}
T2_article_chart(my_data)
如果您的(x*2)^2 > 10
确实是过滤功能,则无需每次都重新计算x
:删除高行并返回。 (换句话说:一旦删除了引起问题的x
行,其他所有行都将不会产生任何不同。因此,请不要重复。)
yy <- function(x, cutoff = 10) {
ind <- rowSums( (x*2)^2 > cutoff ) > 0
return(x[!ind,])
}
yy(x)
# [,1] [,2]
# [1,] -0.560476 1.22408
# [2,] -0.230177 0.35981
# [3,] 1.558708 0.40077
# [4,] 0.070508 0.11068
# [5,] 0.129288 -0.55584
# [6,] 0.460916 0.49785
# [7,] -0.686853 0.70136
# [8,] -0.445662 -0.47279
但是,如果这只是一个占位符。这表示您的实际功能对您发送的矩阵的尺寸敏感...在这种情况下,这可能对您有用。
yy <- function(x, cutoff = 10) {
REDO <- TRUE
while (nrow(x) > 0 && REDO) {
multi_xx <- (x*2)^2
ind <- rowSums(multi_xx > cutoff) > 0
if (any(ind)) {
x <- x[!ind,]
REDO <- TRUE
} else REDO <- FALSE
}
return(x)
}
[通常,当我使用while
时,我尝试放入一个自限制代理,以使其永远不会迭代。例如,我经常定义iter <- 100
,然后使用while(REDO && iter > 0) { iter <- iter - 1; ...; REDO <- something; }
,以便它不会永远循环。但是,此循环的逻辑是,它将仅进行迭代,直到没有一个证据证明cutoff
条件或所有行均已删除。