替代VBA宏中的负向后看

问题描述 投票:0回答:1

我继承了一个REGEX模式,该模式似乎可以在在线测试人员中使用(演示在这里:https://regex101.com/r/KPpoLS/1

但是,它使用负向后看以避免避免获取序列号较长的子字符串。我需要在VBA宏中使用此REGEX模式来识别所有序列号模式(在下面列出)-但据我所知,VBA不支持负向后搜索。

不幸的是,我对REGEX并不熟悉,无法重新设计此REGEX模式-有人可以提供任何建议吗?

\b(?<!-)(\d\/[A-Z]{2}\/\d{6}-\d{2})|([A-Z]-\d\/[A-Z]{2}\/\d{6}-\d{2})|(?<![A-Z])([A-Z]\/[A-Z]{2}\/\d{6}-\d{2})|([A-Z]{2}\/[A-Z]{2}\/\d{6}-\d{2})|([A-Z][0-9]\/[A-Z]{2}\/\d{6}-\d{2})|(?<![A-Z])([A-Z]-[A-Z]{3}\/\d{6}-\d{2})|([0-9]\/[A-Z]{2}\/[A-Z]{3}\/\d{6}-\d{2})|([A-Z]\/[A-Z]{2}\/[A-Z]{3}\/\d{6}-\d{2})|([0-9]\/[A-Z]{2}\/[A-Z]{3}\/\d{4}-\d{2})|([0-9]\/[A-Z]{2}\/[A-Z]{3}\/\d{6}-\d{2})
[list arranged by sub-strings]
  1/AA/111111-11
A-1/AA/111111-11

 A/AA/111111-11
AA/AA/111111-11
A1/AA/111111-11

   A-AAA/111111-11
1/AA/AAA/111111-11
A/AA/AAA/111111-11

1/AA/AAA/1111-11


[orginal list]
1/AA/111111-11
A/AA/111111-11
A1/AA/111111-11
AA/AA/111111-11
A-AAA/111111-11
1/AA/AAA/1111-11
A-1/AA/111111-11
1/AA/AAA/111111-11
A/AA/AAA/111111-11
Sub regex_test_by_word_story_ranges()

Dim stringone As String
Dim regexone As Object

Dim doc As Word.Document
Dim rng As Word.Range
Dim para As Word.Paragraph

Dim i as Long
Dim x As Long
Dim regcount As Long

Dim rngstory As Range

    Dim serialArray() as String


    Set regexone = New RegExp
    Set doc = ActiveDocument

'=========================================
'Loop #1 to find Category 1 serial numbers
'=========================================

    regexone.Pattern = ""
    regexone.Global = True
    regexone.Pattern = IgnoreCase


    For Each rngstory in doc.StoryRanges
        On Error Resume Next
        rngstory.Select
        stringone = stringone & rngstory.Text
    Next rngstory



    Set theMatches = regexone.Execute(stringone)

    regcount = theMatches.Count
    debug.print regcount

    With theMatches
        If .Count > 0 Then
            ReDim Preserve serialArray(.Count, 5)
            x = 1
            For Each Match In theMatches
                debug.print Match.value
                serialArray(x, 1) = Match.value                             'this will become a seach term for another macro
                serialArray(x, 2) = Replace(Match.value, "/", "!")                  'this will becom part of a URL
                serialArray(x, 3) = "www.baseURL.com/" & Replace(Match.value, "/", "!")         'This is a base URL, which (X, 2) on the end. Search term from next macro will find and insert this hyperlink
                serialArray(x, 4) = "Placeholder3"                          'extra, will delete
                serialArray(x, 5) = "Placeholder4"                          'extra, will delete 
                x = x + 1
            Next Match
        End If
    End With


'checking output of array:
    For x = LBound(serialArray) To UBound(serialArray)
        debug.print serialArray(x, 1) & ", " & serialArray(x, 2) & ", " & serialArray(x, 3) & ", " & serialArray(x, 4) & ", " & serialArray(x, 5)
    Next x



'=========================================
'Loop #2 to find Category 2 serial numbers
'=========================================
'Same loop as above code, but I have not developed the REGEX for Category 2 serial numbers yet
'Loop #2 need add Matches from Loop #2 REGEX to the serialArray()
'This portion is beyond my original question, but would welcome any help on adding subsequent loop matches to the serialArray()

'https://stackoverflow.com/questions/60831517/alternative-to-negative-lookbehinds-in-vba-macro?noredirect=1#comment107630601_60831517

End Sub

regex vba ms-word regex-lookarounds
1个回答
0
投票

这是使用多个工具来获得简单解决方案的情况。让Word通配符搜索发挥最大作用,让Word / VBA发挥最大作用。

基于提供的序列号的序列号,序列号是可变的,这意味着定义一个正则表达式来捕获整个字符串非常困难。但是,一旦有了序列号的一部分,我们就可以使用VBA扩展Word范围以涵盖整个序列号。在此基础上,仅需搜索序列号中定义明确的部分。在您的特殊情况下,我将其标识为模式“ / 111111-11”。然后,使用Word / VBA轻松地在找到的范围的开始处扩展范围,直到出现序列号不合法的字符为止。

下面的代码可靠地捕获了您的帖子中列出的序列号,并返回了一个脚本字典,其中的键是长号,而值是与序列号相对应的单词范围。

Option Explicit

Sub testGetSerialNo()

    Dim myDic As Scripting.Dictionary
    Set myDic = GetSerialNos(ActiveDocument)

    Dim myKey As Variant

    For Each myKey In myDic

        Debug.Print myKey; myDic.Item(myKey).Text

    Next

End Sub
Public Function GetSerialNos(ByVal ipDoc As Word.Document) As Scripting.Dictionary

    ' Finds the 1111111-11 part of the serial no
    Const SerialNo As String = "(/)([0-9]{1,})(\-)([0-9]{1,})>"
    Dim mySerialNos As Scripting.Dictionary
    Set mySerialNos = New Scripting.Dictionary

    With ipDoc.StoryRanges(wdMainTextStory)

        With .Find

            .ClearFormatting
            .Wrap = wdFindStop
            .MatchWildcards = True
            .Text = SerialNo

        End With

        Do While .Find.Execute

            DoEvents
            Dim myRange As Word.Range
            Set myRange = .Duplicate
            ' You may need to add more characters that may occur just before the start of a serial number
            myRange.MoveStartUntil cset:=vbTab & vbCrLf & " ,;", Count:=wdBackward
            myRange.Select
            mySerialNos.Add mySerialNos.Count, myRange
            .Start = .End + 1
            .End = ipDoc.StoryRanges(wdMainTextStory).End

        Loop

    End With

    Set GetSerialNos = mySerialNos

End Function

我将您的整个帖子复制到Word,上面的代码产生了以下输出

0 1/AA/111111-11
 1 A-1/AA/111111-11
 2 A/AA/111111-11
 3 AA/AA/111111-11
 4 A1/AA/111111-11
 5 A-AAA/111111-11
 6 1/AA/AAA/111111-11
 7 A/AA/AAA/111111-11
 8 1/AA/AAA/1111-11
 9 1/AA/111111-11
 10 A/AA/111111-11
 11 A1/AA/111111-11
 12 AA/AA/111111-11
 13 A-AAA/111111-11
 14 1/AA/AAA/1111-11
 15 A-1/AA/111111-11
 16 1/AA/AAA/111111-11
 17 A/AA/AAA/111111-11
© www.soinside.com 2019 - 2024. All rights reserved.