我有一个电子表格,其中包含0到90000之间的数字列表,这些数字作为字符串存储在R列中。
每个由另一个系统分配的数字应该是唯一的。之前约有5%被使用过一次或多次。我无法控制其他系统。
每个月我在此列中添加大约50个数字。我需要确定列表中是否已经存在任何新数字(包括添加的新数字),并确定电子表格中包含第一个重复项然后包含每个后续重复项的行。
最终,我需要标识(例如):第51行是第一个包含字符串“ 000356”的行,它也出现在第357和745行中。
逐行进行搜索(在VBA中非常耗时(我目前有1000多行)。我将需要对超过3000行的列进行类似的搜索。
我的研究表明,使用VBA词典将是识别重复项的更快方法。
在下面的小型测试过程中,我无法使其正常运行,更具体地说,我需要确定电子表格中存在重复编号的行。
是否有更好的方法可以实现这一目标,下面如何修改我的测试代码?
' From Module M2A to test faster search methods
' Needs "Microsoft Scripting Runtime" enabled
Dim shtCFYsheet As Worksheet
Dim oFound As Boolean
Dim junk, actName As String
Dim lastrowCFYsheet As Long
Dim dictA As New Scripting.dictionary
Dim keyA, keyB As Variant
Set shtCFYsheet = Worksheets("Communify Sheet")
lastrowCFYsheet = shtCFYsheet.Cells(Rows.Count, "A").End(xlUp).Row
' Load up DictA with all the entries from Column R
For i = 2 To lastrowCFYsheet 'Row 1 contains headings
dictA(Trim(shtCFYsheet.Cells(i, "R").Value)) = 1
Next i
For Each keyA In dictA.Keys
junk = DoEvents()
oFound = False 'reset the flag for the next KeyA entry
EntryA = keyA ' Capture the DictA entry
'Search for the first DictA entry throughout the DictA dictionary
For Each keyB In dictA.Keys
EntryB = keyB ' Capture the DictB entry
'Test for a match
If Trim(EntryA) = Trim(EntryB) Then
If oFound = True Then Debug.Print "Match:" & EntryA, EntryB, "A-row " & _
dictA.Item(keyA), "B-row " & dictA.Item(keyB)
'Ignore first match as that's my own entry
oFound = True 'Now set flag so that next entry gets flagged as a duplicate
End If
Next keyB
Next keyA
结束子
具有两个重复项的样本数据:
2456
4863
4190
2123
5610
9061
2640
679
4702
7428
38
3082
4702
8391
8781
998
2091
3729
5610
5051
1796
3355
169
1788
8838
代码:
Option Explicit
Sub dupeRs()
Dim i As Long, arr As Variant, tmp As Variant
Dim dict As New Scripting.Dictionary
With Worksheets("Communify Sheet")
'load worksheet values into array
arr = .Range(.Cells(1, "R"), .Cells(Rows.Count, "R").End(xlUp)).Value
End With
'build dictionary
For i = 2 To UBound(arr, 1)
If dict.exists(arr(i, 1)) Then
tmp = dict.Item(arr(i, 1))
ReDim Preserve tmp(LBound(tmp) To UBound(tmp) + 1)
tmp(UBound(tmp)) = i
dict.Item(arr(i, 1)) = tmp
Else
dict.Item(arr(i, 1)) = Array(i)
End If
Next i
'optionally remove all non-duplicates
For Each tmp In dict.Keys
If UBound(dict.Item(tmp)) = 0 Then dict.Remove tmp
Next tmp
'debug.print the duplicates and row numbers
For Each tmp In dict.Keys
Debug.Print tmp & " in rows " & Join(dict.Item(tmp), ", ")
Next tmp
End Sub
结果:
005610 in rows 6, 20
004702 in rows 10, 14
您可以修改以下内容并尝试:
Option Explicit
Sub test()
Dim LastrowS1 As Long, LastrowS2 As Long, Times As Long, i As Long
Dim rng As Range, rngFound As Range
Dim str As String
'Find the last row of column A
LastrowS1 = Sheet1.Cells(Sheet1.Rows.Count, "A").End(xlUp).Row
'Start Loop from the lastrow to row 1 upside down
For i = 2 To LastrowS1
'Give value to str
str = Sheet1.Range("A" & i).Value
'Find the last row of column A
LastrowS2 = Sheet2.Cells(Sheet2.Rows.Count, "A").End(xlUp).Row
'Set the range you want to search
Set rng = Sheet2.Range("A2:A" & LastrowS2)
'Count how many times str appears in rng
Times = Application.WorksheetFunction.CountIf(rng, str)
'If it is appears more that one time
If Times > 0 Then
Set rngFound = rng.Find(str)
Sheet2.Cells(rngFound.Row, 2).Value = Sheet2.Cells(rngFound.Row, 2).Value & ", " & "Row" & " " & i
Else
Sheet2.Range("A" & LastrowS2 + 1).Value = str
Sheet2.Range("B" & LastrowS2 + 1).Value = "Row" & " " & i
End If
Next i
End Sub
Sheet 1:
下面的代码在工作表的右侧添加一列,并将行号写入其中。然后,它对列R中的数字字符串进行排序,从而在连续的行中加入重复项。然后检查如此更改的数字列是否存在连续的重复项,并将其行号(在步骤1中创建)记录在右侧添加的另一列中。最后,将数据按行号排序,恢复原始序列,并删除具有行号的列。标记有重复项的列保留在右侧。只有第一个出现的列表中存在重复的所有行号的列表。
要进行测试,请运行过程FindDuplicates
。请注意,可能必须重置代码顶部的两个枚举。在我的试验中,NwsFirstDataRow
为3(自上而下的第三行)。您的数据可能从第2行开始。包含数字的列设置为18(R列,从A = 1开始计数)。您可以指定任何其他列。
Option Explicit
Enum Nws ' Worksheet navigation
' 04 Mar 2019
NwsFirstDataRow = 3 ' assuming 2 caption rows above the data
NwsNumber = 18 ' column R would be 18
End Enum
Sub FindDuplicates()
' 04 Mar 2019
Dim Ws As Worksheet
Dim Rng As Range
Dim FreeClm As Long
Dim R As Long
' modify workbook definition and worksheet name as appropriate
Set Ws = ActiveWorkbook.Worksheets("Duplicates")
With Ws
Set Rng = .Range(.Cells(NwsFirstDataRow, NwsNumber), _
.Cells(.Rows.Count, NwsNumber).End(xlUp))
With .UsedRange
FreeClm = .Columns.Count + .Column
End With
Application.ScreenUpdating = False
WriteRowNumbers Rng, FreeClm
SortNumbers Ws, Rng, FreeClm
MarkDuplicates Ws, Rng, FreeClm
SortNumbers Ws, Rng.Offset(0, FreeClm - NwsNumber), FreeClm + 1
.Columns(FreeClm).Delete
Application.ScreenUpdating = True
End With
End Sub
Private Sub WriteRowNumbers(Rng As Range, _
C As Long)
' 04 Mar 2019
Dim Arr As Variant
Dim R As Long
ReDim Arr(1 To Rng.Rows.Count)
For R = 1 To UBound(Arr)
Arr(R) = Rng.Cells(R).Row
Next R
Rng.Offset(0, C - NwsNumber).Value = Application.Transpose(Arr)
End Sub
Private Sub SortNumbers(Ws As Worksheet, _
Rng As Range, _
C As Long)
' 04 Mar 2019
Dim SortRng As Range
With Ws
Set SortRng = .Range(.Cells(NwsFirstDataRow, 1), _
.Cells(NwsFirstDataRow + Rng.Rows.Count - 1, C))
End With
With Ws.Sort.SortFields
.Clear
.Add Key:=Rng, _
SortOn:=xlSortOnValues, _
Order:=xlAscending, _
DataOption:=xlSortTextAsNumbers
End With
With Ws.Sort
.SetRange SortRng
.Header = False
.MatchCase = False
.Orientation = xlTopToBottom
.SortMethod = xlPinYin
.Apply
End With
End Sub
Private Sub MarkDuplicates(Ws As Worksheet, _
Rng As Range, _
C As Long)
' 04 Mar 2019
Dim Spike As String
Dim Arr As Variant
Dim PrevNum As String, Rt As Long
Dim R As Long
Arr = Rng.Value
For R = 1 To UBound(Arr)
If Arr(R, 1) = PrevNum Then
Spike = Spike & ", " & Ws.Cells(R + NwsFirstDataRow - 1, C).Value
Else
If InStr(Spike, ",") Then Ws.Cells(Rt, C + 1).Value = Spike
Rt = R + NwsFirstDataRow - 1
Spike = Ws.Cells(Rt, C).Value
End If
PrevNum = Arr(R, 1)
Next R
End Sub