一列中重复的条目的行

问题描述 投票:1回答:3

我有一个电子表格,其中包含0到90000之间的数字列表,这些数字作为字符串存储在R列中。

每个由另一个系统分配的数字应该是唯一的。之前约有5%被使用过一次或多次。我无法控制其他系统。

每个月我在此列中添加大约50个数字。我需要确定列表中是否已经存在任何新数字(包括添加的新数字),并确定电子表格中包含第一个重复项然后包含每个后续重复项的行。

最终,我需要标识(例如):第51行是第一个包含字符串“ 000356”的行,它也出现在第357和745行中。

逐行进行搜索(在VBA中非常耗时(我目前有1000多行)。我将需要对超过3000行的列进行类似的搜索。

我的研究表明,使用VBA词典将是识别重复项的更快方法。

在下面的小型测试过程中,我无法使其正常运行,更具体地说,我需要确定电子表格中存在重复编号的行。

是否有更好的方法可以实现这一目标,下面如何修改我的测试代码?

'   From Module M2A to test faster search methods
'   Needs "Microsoft Scripting Runtime" enabled

Dim shtCFYsheet As Worksheet
Dim oFound As Boolean
Dim junk, actName As String
Dim lastrowCFYsheet As Long
Dim dictA As New Scripting.dictionary
Dim keyA, keyB As Variant

Set shtCFYsheet = Worksheets("Communify Sheet")
lastrowCFYsheet = shtCFYsheet.Cells(Rows.Count, "A").End(xlUp).Row

'   Load up DictA with all the entries from Column R

For i = 2 To lastrowCFYsheet 'Row 1 contains headings
    dictA(Trim(shtCFYsheet.Cells(i, "R").Value)) = 1
Next i

For Each keyA In dictA.Keys
    junk = DoEvents()
    oFound = False 'reset the flag for the next KeyA entry

    EntryA = keyA ' Capture the DictA entry

    'Search for the first DictA entry throughout the DictA dictionary
    For Each keyB In dictA.Keys
        EntryB = keyB ' Capture the DictB entry
        'Test for a match
        If Trim(EntryA) = Trim(EntryB) Then
            If oFound = True Then Debug.Print "Match:" & EntryA, EntryB, "A-row " & _
              dictA.Item(keyA), "B-row " & dictA.Item(keyB)
            'Ignore first match as that's my own entry
            oFound = True 'Now set flag so that next entry gets flagged as a duplicate
        End If
    Next keyB
Next keyA

结束子

具有两个重复项的样本数据:

2456
4863
4190
2123
5610
9061
2640
679
4702
7428
38
3082
4702
8391
8781
998
2091
3729
5610
5051
1796
3355
169
1788
8838
excel vba
3个回答
2
投票

代码:

Option Explicit

Sub dupeRs()

    Dim i As Long, arr As Variant, tmp As Variant
    Dim dict As New Scripting.Dictionary

    With Worksheets("Communify Sheet")

        'load worksheet values into array
        arr = .Range(.Cells(1, "R"), .Cells(Rows.Count, "R").End(xlUp)).Value

    End With

    'build dictionary
    For i = 2 To UBound(arr, 1)
        If dict.exists(arr(i, 1)) Then
            tmp = dict.Item(arr(i, 1))
            ReDim Preserve tmp(LBound(tmp) To UBound(tmp) + 1)
            tmp(UBound(tmp)) = i
            dict.Item(arr(i, 1)) = tmp
        Else
            dict.Item(arr(i, 1)) = Array(i)
        End If
    Next i

    'optionally remove all non-duplicates
    For Each tmp In dict.Keys
        If UBound(dict.Item(tmp)) = 0 Then dict.Remove tmp
    Next tmp

    'debug.print the duplicates and row numbers
    For Each tmp In dict.Keys
        Debug.Print tmp & " in rows " & Join(dict.Item(tmp), ", ")
    Next tmp

End Sub

结果:

005610 in rows 6, 20
004702 in rows 10, 14

0
投票

您可以修改以下内容并尝试:

Option Explicit

Sub test()

    Dim LastrowS1 As Long, LastrowS2 As Long, Times As Long, i As Long
    Dim rng As Range, rngFound As Range
    Dim str As String

    'Find the last row of column A
    LastrowS1 = Sheet1.Cells(Sheet1.Rows.Count, "A").End(xlUp).Row

    'Start Loop from the lastrow to row 1 upside down
    For i = 2 To LastrowS1
        'Give value to str
        str = Sheet1.Range("A" & i).Value
        'Find the last row of column A
        LastrowS2 = Sheet2.Cells(Sheet2.Rows.Count, "A").End(xlUp).Row
        'Set the range you want to search
        Set rng = Sheet2.Range("A2:A" & LastrowS2)
        'Count how many times str appears in rng
        Times = Application.WorksheetFunction.CountIf(rng, str)
        'If it is appears more that one time
        If Times > 0 Then
            Set rngFound = rng.Find(str)
            Sheet2.Cells(rngFound.Row, 2).Value = Sheet2.Cells(rngFound.Row, 2).Value & ", " & "Row" & " " & i
        Else
             Sheet2.Range("A" & LastrowS2 + 1).Value = str
             Sheet2.Range("B" & LastrowS2 + 1).Value = "Row" & " " & i
        End If

    Next i

End Sub

Sheet 1:

enter image description here工作表2:

enter image description here


0
投票

下面的代码在工作表的右侧添加一列,并将行号写入其中。然后,它对列R中的数字字符串进行排序,从而在连续的行中加入重复项。然后检查如此更改的数字列是否存在连续的重复项,并将其行号(在步骤1中创建)记录在右侧添加的另一列中。最后,将数据按行号排序,恢复原始序列,并删除具有行号的列。标记有重复项的列保留在右侧。只有第一个出现的列表中存在重复的所有行号的列表。

要进行测试,请运行过程FindDuplicates。请注意,可能必须重置代码顶部的两个枚举。在我的试验中,NwsFirstDataRow为3(自上而下的第三行)。您的数据可能从第2行开始。包含数字的列设置为18(R列,从A = 1开始计数)。您可以指定任何其他列。

Option Explicit

Enum Nws                                ' Worksheet navigation
    ' 04 Mar 2019
    NwsFirstDataRow = 3                 ' assuming 2 caption rows above the data
    NwsNumber = 18                      ' column R would be 18
End Enum

Sub FindDuplicates()
    ' 04 Mar 2019

    Dim Ws As Worksheet
    Dim Rng As Range
    Dim FreeClm As Long
    Dim R As Long

    ' modify workbook definition and worksheet name as appropriate
    Set Ws = ActiveWorkbook.Worksheets("Duplicates")
    With Ws
        Set Rng = .Range(.Cells(NwsFirstDataRow, NwsNumber), _
                         .Cells(.Rows.Count, NwsNumber).End(xlUp))
        With .UsedRange
            FreeClm = .Columns.Count + .Column
        End With
        Application.ScreenUpdating = False
        WriteRowNumbers Rng, FreeClm
        SortNumbers Ws, Rng, FreeClm
        MarkDuplicates Ws, Rng, FreeClm
        SortNumbers Ws, Rng.Offset(0, FreeClm - NwsNumber), FreeClm + 1
        .Columns(FreeClm).Delete
        Application.ScreenUpdating = True
    End With
End Sub

Private Sub WriteRowNumbers(Rng As Range, _
                            C As Long)
    ' 04 Mar 2019

    Dim Arr As Variant
    Dim R As Long

    ReDim Arr(1 To Rng.Rows.Count)
    For R = 1 To UBound(Arr)
        Arr(R) = Rng.Cells(R).Row
    Next R
    Rng.Offset(0, C - NwsNumber).Value = Application.Transpose(Arr)
End Sub

Private Sub SortNumbers(Ws As Worksheet, _
                        Rng As Range, _
                        C As Long)
    ' 04 Mar 2019

    Dim SortRng As Range

    With Ws
        Set SortRng = .Range(.Cells(NwsFirstDataRow, 1), _
                             .Cells(NwsFirstDataRow + Rng.Rows.Count - 1, C))
    End With
    With Ws.Sort.SortFields
        .Clear
        .Add Key:=Rng, _
             SortOn:=xlSortOnValues, _
             Order:=xlAscending, _
             DataOption:=xlSortTextAsNumbers
    End With
    With Ws.Sort
        .SetRange SortRng
        .Header = False
        .MatchCase = False
        .Orientation = xlTopToBottom
        .SortMethod = xlPinYin
        .Apply
    End With
End Sub

Private Sub MarkDuplicates(Ws As Worksheet, _
                           Rng As Range, _
                           C As Long)
    ' 04 Mar 2019

    Dim Spike As String
    Dim Arr As Variant
    Dim PrevNum As String, Rt As Long
    Dim R As Long

    Arr = Rng.Value
    For R = 1 To UBound(Arr)
        If Arr(R, 1) = PrevNum Then
            Spike = Spike & ", " & Ws.Cells(R + NwsFirstDataRow - 1, C).Value
        Else
            If InStr(Spike, ",") Then Ws.Cells(Rt, C + 1).Value = Spike
            Rt = R + NwsFirstDataRow - 1
            Spike = Ws.Cells(Rt, C).Value
        End If
        PrevNum = Arr(R, 1)
    Next R
End Sub
© www.soinside.com 2019 - 2024. All rights reserved.