此 SAS 代码对宽度和长度变量的均值和 CV 执行 10C9,有没有办法在相同的 SAS 代码中执行 10C8,直至 10C1?

问题描述 投票:0回答:1

此 SAS 代码对宽度和长度变量的均值和 CV 执行 10 个组合 9,有没有办法在同一 SAS 代码中执行 10 个组合 8,直至 10C1?

data yourdata;          
   input Obs sample_index $ width length;           
   datalines;           
1   Hand1   112.92  48.4075
2   Hand1   132.315 69.9522
3   Hand1   233.248 39.021
4   Hand1   282.457 41.0748
5   Hand1   111.281 31.0925
6   Hand1   197.349 82.4425
7   Hand1   118.723 41.2737
8   Hand1   293.993 22.9405
9   Hand1   272.179 83.9418
10  Hand1   204.086 69.144
11  Hand2   132.35  83.6597
12  Hand2   113.897 46.8513
13  Hand2   185.176 94.1936
14  Hand2   135.936 56.0927
15  Hand2   265.141 64.873
16  Hand2   227.11  80.9043
17  Hand2   265.468 86.7117
18  Hand2   249.726 64.1256
19  Hand2   124.951 43.8425
20  Hand2   285.231 66.0117
;           
            
%let max_combinations = 9; /* Maximum number of values in a combination */          
%let total_obs_hand1 = 10; /* Total number of Hand1 observations in my dataset */           
%let total_obs_hand2 = 10; /* Total number of Hand2 observations in my dataset */           
            
array width_vals[&max_combinations] width1-width&max_combinations;          
array length_vals[&max_combinations] length1-length&max_combinations;           
            
/* Calculate all possible combinations of 9 values for Hand1 */         
proc sql;           
   create table combinations_hand1 as           
   select distinct a.width as width1, b.width as width2, c.width as width3,         
                     d.width as width4, e.width as width5, f.width as width6,           
                     g.width as width7, h.width as width8, i.width as width9,           
                     a.length as length1, b.length as length2, c.length as length3,         
                     d.length as length4, e.length as length5, f.length as length6,         
                     g.length as length7, h.length as length8, i.length as length9          
   from yourdata as a, yourdata as b, yourdata as c, yourdata as d, yourdata as e,          
        yourdata as f, yourdata as g, yourdata as h, yourdata as i          
   where a.sample_index = 'Hand1' and b.sample_index = 'Hand1'          
     and c.sample_index = 'Hand1' and d.sample_index = 'Hand1'          
     and e.sample_index = 'Hand1' and f.sample_index = 'Hand1'          
     and g.sample_index = 'Hand1' and h.sample_index = 'Hand1'          
     and i.sample_index = 'Hand1'           
     and a.Obs < b.Obs and b.Obs < c.Obs and c.Obs < d.Obs and d.Obs < e.Obs            
     and e.Obs < f.Obs and f.Obs < g.Obs and g.Obs < h.Obs and h.Obs < i.Obs            
   ;            
quit;           
            
/* Calculate all possible combinations of 9 values for Hand2 */         
proc sql;           
   create table combinations_hand2 as           
   select distinct a.width as width1, b.width as width2, c.width as width3,         
                     d.width as width4, e.width as width5, f.width as width6,           
                     g.width as width7, h.width as width8, i.width as width9,           
                     a.length as length1, b.length as length2, c.length as length3,         
                     d.length as length4, e.length as length5, f.length as length6,         
                     g.length as length7, h.length as length8, i.length as length9          
   from yourdata as a, yourdata as b, yourdata as c, yourdata as d, yourdata as e,          
        yourdata as f, yourdata as g, yourdata as h, yourdata as i          
   where a.sample_index = 'Hand2' and b.sample_index = 'Hand2'          
     and c.sample_index = 'Hand2' and d.sample_index = 'Hand2'          
     and e.sample_index = 'Hand2' and f.sample_index = 'Hand2'          
     and g.sample_index = 'Hand2' and h.sample_index = 'Hand2'          
     and i.sample_index = 'Hand2'           
     and a.Obs < b.Obs and b.Obs < c.Obs and c.Obs < d.Obs and d.Obs < e.Obs            
     and e.Obs < f.Obs and f.Obs < g.Obs and g.Obs < h.Obs and h.Obs < i.Obs            
   ;            
quit;           
            
/* Calculate the mean width and length for each combination for Hand1 */            
data means_hand1;           
   set combinations_hand1;          
   num_values = &max_combinations;          
   width_mean = mean(of width1-width&max_combinations);         
   length_mean = mean(of length1-length&max_combinations);          
run;            
            
/* Calculate the mean width and length for each combination for Hand2 */            
data means_hand2;           
   set combinations_hand2;          
   num_values = &max_combinations;          
   width_mean = mean(of width1-width&max_combinations);         
   length_mean = mean(of length1-length&max_combinations);          
run;            
            
/* Calculate the coefficient of variation (CV) for width and length for Hand1 */            
data cv_hand1;          
   set means_hand1;         
   cv_width = (std(of width1-width&max_combinations) / width_mean) * 100;           
   cv_length = (std(of length1-length&max_combinations) / length_mean) * 100;           
run;            
            
/* Calculate the coefficient of variation (CV) for width and length for Hand2 */            
data cv_hand2;          
   set means_hand2;         
   cv_width = (std(of width1-width&max_combinations) / width_mean) * 100;           
   cv_length = (std(of length1-length&max_combinations) / length_mean) * 100;           
run;            
            
/* Display the resulting means and CVs for Hand1 */         
proc print data=cv_hand1;           
run;            
            
/* Display the resulting means and CVs for Hand2 */         
proc print data=cv_hand2;           
run;            

我需要创建调整 SAS 代码以将宽度和长度的所有可能的 10C9、10C8、10C7 降低到 10C1 组合,我将不胜感激。

sas combinations modeling
1个回答
0
投票

您可以使用 CALL LEXICOMB() 从 N 个值生成 K 值的所有组合。

您可以使用 PROC MEANS(又名 PROC SUMMARY)来计算变量的 MEAN 和 CV。

因此,使用数据步骤将组合生成到数据集中,然后使用 PROC SUMMARY 查找 CV。

当所有 BY 组都有相同数量的观察值时,这很简单。但是,如果它们具有不同数量的观察值(不同的 N 值),那么您将需要使用更复杂的代码或可能的代码生成(又名宏语言),以便您可以生成包含不同数量变量的 CALL LEXCOMB() 语句.

首先让我们制作一些以观察数作为变量的数据。并使用 PROC TRANSPOSE 将这些数字放入变量列表中。

data have;
  row+1; 
  input id :$5. width length;
datalines;           
Hand1   112.92  48.4075
Hand1   132.315 69.9522
Hand1   233.248 39.021
Hand1   282.457 41.0748
Hand1   111.281 31.0925
Hand1   197.349 82.4425
Hand1   118.723 41.2737
Hand1   293.993 22.9405
Hand1   272.179 83.9418
Hand1   204.086 69.144
Hand2   132.35  83.6597
Hand2   113.897 46.8513
Hand2   185.176 94.1936
Hand2   135.936 56.0927
Hand2   265.141 64.873
Hand2   227.11  80.9043
Hand2   265.468 86.7117
Hand2   249.726 64.1256
Hand2   124.951 43.8425
Hand2   285.231 66.0117
; 

proc transpose data=have out=wide(drop=_name_) prefix=row;
  by id;
  var row;
run;

对于这个例子,我们知道每个 BY 组有 10 个观察值。

%let n=10;

现在让我们使用 COMB() 和 CALL LEXICOMB() 来生成一次获取 9 个和 8 个观测值的所有可能组合。您只需将下限从 N-2 更改为 2 即可扩展到更多。

我们可以使用 SET 语句中的 POINT= 选项来重置观测值数组,并提取单个观测值的宽度和长度。

data comb(keep=id k i row:) tall(keep=id k i width length);
  do group=1 to nobs;
   set wide nobs=nobs;
   n=&n;
   do k=n-1 to n-2 by -1;
     set wide(keep=row1-row&n) point=group ;
     do i=1 to comb(n,k);
       call lexcomb(i,k,of row1-row&n);
       output comb;
       array row row1-row&n;
       do j=1 to k;
         p=row[j];
         set have(keep=width length) point=p;
         output tall;
       end;
     end;
   end;
  end;
run;

现在让我们计算各个组的宽度和长度的平均值和CV。

proc summary data=tall ;
  by id descending k i;
  var width length;
  output out=stats(drop=_type_) n= mean= cv= /autoname;
run;

并将结果与数据集和所使用的观察列表结合起来。让我们删掉未使用的观察数字,以便更清楚。

data want;
  merge stats comb;
  by id descending k i ;
  array row row1-row&n;
  do j=k+1 to &n;
    row[j]=.;
  end;
  drop j;
run;

结果

© www.soinside.com 2019 - 2024. All rights reserved.