我有工作代码,可以查看单元格 C3:C 的值,然后尝试在文件中查找匹配的字符串(从最新到最旧,扩展名为 .txt)。
我想加快处理时间以循环遍历文件夹中的 20,000 个文件。
看起来代码运行每个循环时默认按名称升序排序。
我希望它按优先级顺序执行此操作:
所有 .txt 文件加起来约占 20,000 个文件的 10%。
' ======================= Sorting Algorithm ======================================
fileDate = 0
For fileDate = 0 To 20 'days back to search
file = Dir(folderPath & "*.txt")
For Each file In fs.getfolder(folderPath).files
SRName = Replace(Left(Mid(file, 35), 6), "_", "") 'works if all names are 1234XY, trims underscores
If file.datelastmodified >= Now - fileDate And LCase(Right(file.Name, 4)) = ".prl" Then ' FILE MATCH
' =========== Part List Prep Array ============
For Each cell In Range("C3:C" & LastRow) ' CELLS NAME MATCH TO CHECK IN FILE
ToTestIndex = cell.Row - 2
Homework = "PNL1," & cell
If PartToTestArray(ToTestIndex) <> True And InStr(1, Homework, SRName, vbTextCompare) > 0 Then
If InStr(1, Homework, SRName, vbTextCompare) > 0 Then
Cells(cell.Row, 6).Value = "Searching in " & "'" & Mid(file, 35, Len(file) - 38) & "'"
PartToTestArray(ToTestIndex) = True
Cells(cell.Row, 7).Value = file.datelastmodified
End If
End If
Next cell
' =========== Part List Prep Array ============
fileNumber = FreeFile ' Read the content of the file
Open file For Input As fileNumber
fileContent = Input$(LOF(fileNumber), fileNumber)
Close fileNumber
lines = Split(fileContent, vbCrLf) ' Split the content into an array of lines
For i = 4 To UBound(lines) - 1 Step 3 ' new
For Each cell In Range("C3:C" & LastRow) ' Cells matching file testing
FoundIndex = cell.Row - 2
ToTestIndex = cell.Row - 2
If FoundPartArray(FoundIndex) <> True And PartToTestArray(ToTestIndex) = True Then
Homework = "PNL1," & cell
If lines(i) = "PNL4,0=" Then 'voodoo adjustment
i = i + 1
End If
If InStr(1, lines(i), Homework, vbTextCompare) > 0 Then ' Check if the line contains the Part Number
For QtySearch = 1 To 20 ' very small factor on the amount of total time for process
PartMultiplier = Cells(cell.Row, cell.Column + 2).Value
If InStr(1, lines(i), "0,0,0,0,0000,0", vbTextCompare) > 0 Then
PartMultiplier = PartMultiplier & ",0,0,0,0000,0"
ShotInTheDark = "0,0,0,0,0000,0"
Else
ShotInTheDark = "," & QtySearch & ",0,"
PartMultiplier = "," & PartMultiplier & ",0,"
End If
If InStr(1, lines(i), ShotInTheDark, vbTextCompare) > 0 Then
lines(i) = Replace(lines(i), ShotInTheDark, PartMultiplier)
Exit For ' Exiting QtySearch
End If
Next QtySearch
AddPart = lines(i) & vbCrLf & lines(i + 1) & vbCrLf & lines(i + 2) & vbCrLf ' Concatenate the current, previous, and next lines
FoundPartArray(FoundIndex) = True ' PART IS FOUND
Cells(cell.Row, 6).Value = Mid(file, 35, Len(file) - 38)
Cells(cell.Row, 7).Value = file.datelastmodified
cell.Interior.Color = RGB(0, 255, 0) ' green
outputContent = outputContent & AddPart
Exit For ' cell exit
End If
End If
Next cell ' cell row increment
Next i ' row in prl file increment
file = Dir ' Get the next file in the folder
End If
Next file ' next file in folder increment
AllPartsFound = True
For FoundIndex = LBound(FoundPartArray) To UBound(FoundPartArray)
If Not FoundPartArray(FoundIndex) Then
AllPartsFound = False
Exit For
End If
Next FoundIndex
If AllPartsFound = True Then
fileDate = 20
End If
Next fileDate
' ======================= Sorting Algorithm ======================================
我尝试过:
A. 组合/拆分 If/then 语句(例如“datemodified 和 .txt 扩展名”),看看哪种方式最有效。
B. 浏览文件中的所有单元格与获取一个单元格并在移动到下一个单元格之前浏览每个文件。
我让它工作的最快速度是每个文件 0.007 秒。这是一个 2 分钟以上的过程。
是否有更快的方法,例如先将所有 20,000 个文件添加到数组中?
我考虑添加一个可以使用 Windows 任务计划程序编写脚本的子文件夹,每天更新 2-3 次,仅复制 .txt 文件,从而将搜索范围缩小 90%。
这可能会让您开始采用不同的方法。
为了进行测试,我在一个文件夹中创建了 10k txt 文件,并为它们分配了最近 50 天内的随机上次修改日期。
下面的代码对它们进行一次循环,并根据年龄(以天为单位)收集符合条件的文件,并将其放入一组集合中。
整个收集过程大约需要 4 秒(这可能会收集比您要处理的文件多得多的文件,因为我所有的 10k 文件都具有相同的扩展名,并且与您的实际用例相比,时间可能有限)。
Sub Tester()
Const FPATH As String = "C:\Temp\VBA\Folder1\"
Dim f, t, dt, ns As Object, pth As String, oFile As Object
Dim days(1 To 20) As Collection, d As Long, lmdays
'Code below was used to create some dummy files and set random last-modified date
' Set ns = CreateObject("Shell.Application").Namespace(FPATH)
' For i = 1 To 10000
' PutContent FPATH & "File_" & Format(i, "000000") & ".txt", "testing " & i
' Set oFile = ns.ParseName("File_" & Format(i, "000000") & ".txt")
' oFile.ModifyDate = Date - (Rnd() * 50) 'last modified within 50 days
' Next i
For d = 1 To 20
Set days(d) = New Collection 'initialize array of collections
Next d
t = Timer
f = Dir(FPATH & "*.txt")
Do While Len(f) > 0
dt = FileDateTime(FPATH & f)
lmdays = Application.Ceiling(Now - dt, 1) 'last mod age in whole days
If lmdays <= 20 Then 'within our window?
days(lmdays).Add FPATH & f 'collect this file
End If
f = Dir()
Loop
'check how many files for each day
For d = 1 To 20
Debug.Print d, days(d).count
'## Process the files from this Collection... ##
Next d
Debug.Print Timer - t
End Sub
'used when creating test files
Sub PutContent(f As String, content As String)
CreateObject("scripting.filesystemobject"). _
OpenTextFile(f, 2, True).Write content
End Sub