基于引用表和单独的数据表创建矩阵

问题描述 投票:-1回答:1

我有一个参数化表,提到“矩阵1”的第(i,j)个元素是零,行和的残差还是必须从数据表中读取。我还有一个包含不同段的所有值的数据表。我如何构建矩阵?

例如,假设“param_table”是参数化表:

data param_table;
infile datalines dsd;
length FieldName $20 FieldSourceTable $20;
input Matrix_Id Column_Order Row_Order IsZero IsRowResidual IsColumnResidual FieldName FieldSourceTable;
datalines;
1,    1,   1,   0,  1,  0,  .,      .
1,    1,   2,   0,  0,  0,  xyz,    table1
1,    1,   3,   0,  0,  0,  abc,    table1
1,    2,   1,   1,  0,  0,  .,      .
1,    2,   2,   0,  0,  0,  pqr,    table1
1,    2,   3,   0,  0,  0,  mno,    table1
1,    3,   1,   0,  0,  0,  ab,     table1
1,    3,   2,   0,  0,  0,  pq,     table1
1,    3,   3,   0,  1,  0,  .,      .
;

“表1”是包含早期表中的值和引用的实际数据:

data table1;
input Year (country method Segment) ( : $12.)
  ABC XYZ PQR MNO AB PQ;
datalines;
2017 France ABC Retail    0.2 0.5 0.4 0.3 0.6 0.1
2017 France XYZ Corporate 0.1 0.5 0.4 0.2 0.6 0.2
;
run;

如何为表1中的每一行(每个键集)创建具有这些规则的矩阵?例如,“table1”的第1行的矩阵将是:

(1-ab) 0    ab
xyz    pqr  pq
abc    mno  (1-abc-mno)

(1,1)和(3,3)元素是行残差,因此是(1 - 行的其余部分的和),而(1,2)元素是0:

0.4  0    0.6
0.5  0.4  0.1
0.1  0.3  0.6

我已经为文件“param_table”添加了数据步骤,其中包含引用(列名称)以及它是零还是行残余。还添加了包含实际值的“table1”文件。对于“table1”的每一行,我们应该有一个基于param_table中提到的规则的矩阵。

谢谢!

sas sas-iml
1个回答
0
投票

param_table中定义的每个矩阵将对应于与table1中的每一行相关联的2-D阵列。假设您有宏matrixfier,它生成从table1数据映射到指定数组(即矩阵)所需的源代码语句。

%macro matrixfier (matrix_id=1, arrayName=x, out=);
  %local rowCount colCount source z i; 
  %local s1 s2 s3 s4 s addr;

宏必须检查参数数据,以确定它包含的设置在代码生成方面是否合理。

  proc sql noprint;
    select *
    from PARAM_TABLE where matrix_id = &matrix_id
    and ( iszero not in (0,1) or
          isrowresidual not in (0,1) or
          iscolumnresidual not in (0,1) or
          sum(iszero,isrowresidual,iscolumnresidual) not in (0,1)
    );

    %if &sqlobs %then %do;
      %put ERROR: Parameters for matrix_id=&matrix_id. rejected for is* settings.;
      %abort cancel;
    %end;
    select max(z) as z into :z from
    ( select column_order, sum(iscolumnresidual) as z
      from PARAM_TABLE where matrix_id = &matrix_id
      and iscolumnresidual
      group by column_order
    );

    %if &z > 1 %then  %do;
      %put ERROR: Parameters for matrix_id=&matrix_id. rejected for iscolumnresidual settings.;
      %abort cancel;
    %end;

确定目标数组需要多大。另外,假设每个矩阵只定义了一个源表

    select max(column_order), max(row_order), max(fieldsourcetable) 
    into :colCount, :rowCount, :source
    from PARAM_TABLE where matrix_id = &matrix_id
    ;

代码生成DATA Step语句以直接分配值。

    select cats("&arrayName.(",row_order,',',column_order,')=', fieldname)
    into :s1 separated by ';'
    from PARAM_TABLE where matrix_id = &matrix_id
    and iszero=0 and isrowresidual=0 and iscolumnresidual=0
    order by row_order, column_order
    ;

代码生成用于分配零值的DATA Step语句。

    select cats("&arrayName.(",row_order,',',column_order,')=0')
    into :s2 separated by ';'
    from PARAM_TABLE where matrix_id = &matrix_id
    and iszero
    order by row_order, column_order
    ;

代码生成用于计算行残差的DATA Step语句。

    %do i = 1 %to &rowCount;
      select
        cats(B.row_order,',',B.column_order),
        '-' || A.fieldname
      into
        :addr,
        :s separated by ','
      from PARAM_TABLE A
      join PARAM_TABLE B
        on A.matrix_id = B.matrix_id
           and A.row_order = B.row_order
      where
        A.matrix_id = &matrix_id and A.row_order=&i 
        and A.isrowresidual=0 and A.iszero=0 and A.iscolumnresidual=0
        and B.isrowresidual=1
      ;

      %if &sqlobs > 0 %then %let s3=&s3&arrayName.(&addr)=sum(1,&s)%str(;);
    %end;

代码生成用于计算列残差的DATA Step语句。

    %do i = 1 %to &colCount;
      select
        cats(B.row_order,',',B.column_order),
        '-' || A.fieldname
      into
        :addr,
        :s separated by ','
      from PARAM_TABLE A
      join PARAM_TABLE B
        on A.matrix_id = B.matrix_id
           and A.column_order = B.column_order
      where
        A.matrix_id = &matrix_id and A.column_order=&i 
        and A.isrowresidual=0 and A.iszero=0 and A.iscolumnresidual=0
        and B.iscolumnresidual=1
      ;

      %if &sqlobs > 0 %then %let s4=&s4&arrayName.(&addr)=sum(1,&s)%str(;);
    %end;

  quit;

在DATA步骤中汇编语句。

  data &out;
    set &source;

    array &arrayName.(&rowCount,&colCount);

    call missing (of &arrayName.(*));

    * assignments;
    &s1;
    * zeroes;
    &s2;
    * row residuals;
    &s3;
    * column residuals;
    &s4;

    * log the matrix for this row;
    do _i = 1 to dim(&arrayName.,1);
      do _j = 1 to dim(&arrayName.,2);
        putlog &arrayName(_i,_j) 6.2 @;
      end;
      putlog;
    end;
    putlog;

  run;
%mend;

解析应用于数据的参数

options mprint;
%matrixfier(matrix_id=1, arrayName=x, out=each);
© www.soinside.com 2019 - 2024. All rights reserved.