优化匹配名称和日期/时间字段的完全外连接的提示

问题描述 投票:0回答:1

以下步骤对两个表执行 Full Outer Join,然后执行 Append 操作,每个表大约有 150 万条记录。

预览在我的工作站上刷新了 5 个小时,看不到尽头。我怎样才能使这些步骤更有效率?

/* 
pfFPTmcumerged -- I performed a Full Outer Join on the common fields 
[Hull Number], [Date], [Time] then Expanded all fields of the second 
table in the join. 
*/

[
    Source = Table.NestedJoin(pfFPTenginemcu, {"Hull Number", "Date", "Time"}, pfFPTswbdmcu, {"Hull Number", "Date", "Time"}, "pfFPTswbdmcu", JoinKind.FullOuter),
    #"Expanded Ref Step" = Table.ExpandTableColumn(Source, "pfFPTswbdmcu", {"Date", "Time", "Hull Number", "40F_101T01", "40F_201T01", "40F_301T01", "40I_101T01", "40I_102T01", "40I_201T01", "40I_202T01", "40U_101T01", "40U_102T01", "40U_103T01", "40U_201T01", "40U_202T01", "40U_203T01", "40U_301T01", "41AZ016T03", "41BP003T03", "41BP007T03", "41BP008T03", "41BP012T03", "41BS001T01", "41BS010T03", "41BT005T03", "41BT009T03", "41BT011T03", "41BT013T03", "41F_001T01", "41I_001T01", "41P_001T01", "41PF001T01", "41Q_001T01", "41S_001T01", "41U_001T01", "41U_014T03", "42AZ016T03", "42BP003T03", "42BP007T03", "42BP008T03", "42BP012T03", "42BS001T01", "42BS010T03", "42BT005T03", "42BT009T03", "42BT011T03", "42BT013T03", "42F_001T01", "42I_001T01", "42P_001T01", "42PF001T01", "42Q_001T01", "42S_001T01", "42U_001T01", "42U_014T03", "43BS001T01", "43F_001T01", "43I_001T01", "43P_001T01", "43PF001T01", "43Q_001T01", "43S_001T01", "43U_001T01"}, {"Date.1", "Time.1", "Hull Number.1", "40F_101T01", "40F_201T01", "40F_301T01", "40I_101T01", "40I_102T01", "40I_201T01", "40I_202T01", "40U_101T01", "40U_102T01", "40U_103T01", "40U_201T01", "40U_202T01", "40U_203T01", "40U_301T01", "41AZ016T03", "41BP003T03", "41BP007T03", "41BP008T03", "41BP012T03", "41BS001T01", "41BS010T03", "41BT005T03", "41BT009T03", "41BT011T03", "41BT013T03", "41F_001T01", "41I_001T01", "41P_001T01", "41PF001T01", "41Q_001T01", "41S_001T01", "41U_001T01", "41U_014T03", "42AZ016T03", "42BP003T03", "42BP007T03", "42BP008T03", "42BP012T03", "42BS001T01", "42BS010T03", "42BT005T03", "42BT009T03", "42BT011T03", "42BT013T03", "42F_001T01", "42I_001T01", "42P_001T01", "42PF001T01", "42Q_001T01", "42S_001T01", "42U_001T01", "42U_014T03", "43BS001T01", "43F_001T01", "43I_001T01", "43P_001T01", "43PF001T01", "43Q_001T01", "43S_001T01", "43U_001T01"})
]

/*
pfFPTmcumergedLeft -- Referencing the full expanded table, I pulled 
the "left" part of the table/records where the common fields had a 
match between the two original tables by filtering one of the common 
fields for <> null. Then just removed the redundant common fields from 
the second table.
*/

let
    Source = pfFPTmcumerged,
    #"Expanded Ref Step" = Source[Expanded Ref Step],
    #"Left of Merge" = Table.SelectRows(#"Expanded Ref Step", each [Date] <> null),
    #"Remove Redundant SWBD Date Time Hull" = Table.RemoveColumns(#"Left of Merge",{"Date.1", "Time.1", "Hull Number.1"})
in
    #"Remove Redundant SWBD Date Time Hull"

/*
pfFPTmcumergedRight -- Referencing the full expanded table, I pulled 
the "right" part of the table/records where the common fields 
did not match between the two original tables by filtering one of 
the common fields for = null. Then kept only the fields of the 
original second table (and renamed them to match the names of the 
common fields of the "Left" table. Now, I only have one set of the 
common fields in the "Left" and "Right" tables.
*/

let
    Source = pfFPTmcumerged,
    #"Expanded Ref Step" = Source[Expanded Ref Step],
    #"Right of Merge" = Table.SelectRows(#"Expanded Ref Step", each [Date] = null),
    #"Keep Only SWBD" = Table.SelectColumns(#"Right of Merge",{"Date.1", "Time.1", "Hull Number.1", "40F_101T01", "40F_201T01", "40F_301T01", "40I_101T01", "40I_102T01", "40I_201T01", "40I_202T01", "40U_101T01", "40U_102T01", "40U_103T01", "40U_201T01", "40U_202T01", "40U_203T01", "40U_301T01", "41AZ016T03", "41BP003T03", "41BP007T03", "41BP008T03", "41BP012T03", "41BS001T01", "41BS010T03", "41BT005T03", "41BT009T03", "41BT011T03", "41BT013T03", "41F_001T01", "41I_001T01", "41P_001T01", "41PF001T01", "41Q_001T01", "41S_001T01", "41U_001T01", "41U_014T03", "42AZ016T03", "42BP003T03", "42BP007T03", "42BP008T03", "42BP012T03", "42BS001T01", "42BS010T03", "42BT005T03", "42BT009T03", "42BT011T03", "42BT013T03", "42F_001T01", "42I_001T01", "42P_001T01", "42PF001T01", "42Q_001T01", "42S_001T01", "42U_001T01", "42U_014T03", "43BS001T01", "43F_001T01", "43I_001T01", "43P_001T01", "43PF001T01", "43Q_001T01", "43S_001T01", "43U_001T01"}),
    #"Renamed Date Time Hull" = Table.RenameColumns(#"Keep Only SWBD",{{"Date.1", "Date"}, {"Time.1", "Time"}, {"Hull Number.1", "Hull Number"}})
in
    #"Renamed Date Time Hull"

/*
fMCUTrend -- Finally, just appended the "Left" and "Right" tables, 
one on top of the other.
*/

let
    Source = Table.Combine({pfFPTmcumergedLeft, pfFPTmcumergedRight})
in
    Source
powerbi powerquery powerbi-desktop m
1个回答
0
投票

我没有尝试过 1.5m 行。但操作列表可能比操作表格更好:

enter image description here

let
  Source = Excel.CurrentWorkbook(),
  pfFPTenginemcu = Table.Buffer(Source{[Name = "pfFPTenginemcu"]}[Content]),
  pfFPTswbdmcu = Table.Buffer(Source{[Name = "pfFPTswbdmcu"]}[Content]),
  // keys in left table
  keys1 = List.Zip({List.Buffer(pfFPTenginemcu[Hull Number]), List.Buffer(pfFPTenginemcu[Date]), List.Buffer(pfFPTenginemcu[Time])}),
  // keys in right table
  keys2 = List.Zip({List.Buffer(pfFPTswbdmcu[Hull Number]), List.Buffer(pfFPTswbdmcu[Date]), List.Buffer(pfFPTswbdmcu[Time])}),
  // find common value in keys1 and keys2
  junction = List.Intersect({keys1, keys2}),
  // find the "left part" of the left table
  included1 = List.Transform(keys1, each List.Contains(junction, _)),
  indices1 = List.Positions(included1),
  excludedIndices1 = List.Select(indices1, each included1{_} = false),
  // combine the "left part" of left table with right table
  rows1 = List.Buffer(Table.ToRows(pfFPTenginemcu)),
  rows2 = List.Buffer(Table.ToRows(pfFPTswbdmcu)),
  excludedRows1 = List.Transform(excludedIndices1, each rows1{_}),
  combinedRows = Table.FromRows(excludedRows1 & rows2)
in
  combinedRows
© www.soinside.com 2019 - 2024. All rights reserved.