我有多个Word文档文件,每个文件都包含如下所示的文字记录(段落标记未显示):
Some Title1 ' <--- Some title ending with paragraph mark
(Apr 3, 2023 - 9:00am) ' <--- Date - time ending with paragraph mark
' <--- blank line ending with paragraph mark
(00:00:00 - 00:00:02) ' <--- timestamp ending with paragraph mark
Harry: Okay, thank you. ' <--- Speaker: Text ending with paragraph mark
(00:00:02 - 00:00:06)
Tom: Hi, Harry, hello. Are you okay?
(00:00:06 - 00:00:09)
Harry: Yeah, I'm good, thank you. How are you doing? Happy Monday to you.
(00:00:09 - 00:00:12)
Tom: It's a nice Monday today, so it's quite bright for a change.
由于有很多doc文件,我想将每个doc文件的全部内容(所有段落)复制到excel表中
Sheet2
,将每个内容附加到最后一个非空白行。完成后,我想使用 Excel 中的 TextToColumns 功能将文本拆分为单独的列,如下所示:
标题 | 日期时间 | 时间戳 | 扬声器 | 文字 |
---|---|---|---|---|
一些标题1 | (2023 年 4 月 3 日 - 上午 9:00) | (00:00:00 - 00:00:02) | 哈利 | 好的,谢谢。 |
(00:00:02 - 00:00:06) | 汤姆 | 嗨,哈利,你好。你还好吗? | ||
(00:00:06 - 00:00:09) | 哈利 | 是的,我很好,谢谢。你好吗?祝你星期一快乐。 | ||
(00:00:09 - 00:00:12) | 汤姆 | 今天是一个美好的星期一,所以天气很适合改变。 | ||
一些标题2 | (2023年4月5日 - 19:00pm) | (00:00:00 - 00:00:04) | 吉尔 | 我过得很好。 |
(00:00:04 - 00:00:06) | 杰克 | 嗨,吉尔,你有空吗? | ||
(00:00:06 - 00:00:12) | 吉尔 | 我很忙。 | ||
(00:00:12 - 00:00:23) | 杰克 | 好的,改天见吧。 |
...
目前我只能循环并将文档内容复制粘贴到工作表中。一旦合并到工作表中,我想将此内容转置到表格中,如上所示。另外,如果有一种方法可以将所有文档内容收集到数组或 ado 记录集中,然后将数组/记录集内容直接传输到工作表中,这将加快代码速度并节省一些时间。
Option Explicit
Sub ParseTranscriptToExcelSheet()
Application.ScreenUpdating = False
Application.DisplayAlerts = False
Dim wdApp As Object ' Word application
Dim wdDoc As Object ' Word document
Dim tbl As Object ' Table
Dim para As Object ' Paragraph
Dim row As Integer ' Row index for the table
Dim i As Long
Dim oFileDialog As FileDialog
Dim vSelectedItem As Variant
' declare worksheets
Dim ws1 As Worksheet
Dim ws2 As Worksheet
Set ws1 = ThisWorkbook.Sheets(1) ' contains button to run code
Set ws2 = ThisWorkbook.Sheets(2)
' Add a header row to the worksheet 2
ws2.Range("A1:E1").Value = Array("Title", "DateTime", "Timestamp", "Speaker", "Text")
' Initialize the row index for the table
row = ws2.Cells(ws2.Rows.Count, 1).End(xlUp).row + 1
' Open the Word document containing the transcript
On Error Resume Next
Set wdApp = GetObject(, "Word.Application")
On Error GoTo 0
If wdApp Is Nothing Then
Set wdApp = CreateObject("Word.Application")
End If
With wdApp
.Visible = False
End With
' ReDim sContent(1 To 1)
Set oFileDialog = Application.FileDialog(msoFileDialogFilePicker)
With oFileDialog
.Title = "Select Word Files"
.AllowMultiSelect = True
.Filters.Add "Word files", "*.doc*", 1
If .Show = -1 Then
ws2.Activate
For Each vSelectedItem In .SelectedItems
Set wdDoc = wdApp.Documents.Open(vSelectedItem)
With wdDoc
' sContent(UBound(sContent)) = .Content.formattedtext.text
.Content.Copy
ws2.Cells(row, 1).PasteSpecial (xlPasteValues)
Application.CutCopyMode = False
DoEvents
.Close savechanges:=False
row = ws2.Cells(ws2.Rows.Count, 1).End(xlUp).row + 1
End With
' ReDim Preserve sContent(1 To UBound(sContent) + 1) As String
Next vSelectedItem
' ReDim Preserve sContent(1 To UBound(sContent) - 1) As String
Else
MsgBox "No files selected"
End If
End With
wdApp.Quit
Set wdDoc = Nothing
Set wdApp = Nothing
Set ws1 = Nothing
Set ws2 = Nothing
Application.DisplayAlerts = True
Application.ScreenUpdating = True
End Sub
试试这个:
Option Explicit
Sub ParseTranscriptToExcelSheet()
Dim wdApp As Object ' Word application
Dim wdDoc As Object ' Word document
Dim allFiles As Collection, f, txt As String, arr, el, ub As Long
Dim ws2 As Worksheet, nextRow As Long, x As Long, arr2, ln
Set allFiles = SelectedFiles()
If allFiles.Count = 0 Then Exit Sub
Set ws2 = ThisWorkbook.Sheets(2)
ws2.Range("A1:E1").Value = Array("Title", "DateTime", "Timestamp", "Speaker", "Text")
nextRow = ws2.Cells(ws2.Rows.Count, 1).End(xlUp).row + 1
Set wdApp = GetWordApp()
For Each f In allFiles 'loop over selected files
Set wdDoc = wdApp.Documents.Open(f) 'open files
txt = wdDoc.Range.Text 'read content
wdDoc.Close False
arr = Split(txt, vbCr) 'get array of lines/paras
ub = UBound(arr)
If ub > 0 Then
ws2.Cells(nextRow, "A").Value = arr(0) 'fill the "header" info
ws2.Cells(nextRow, "B").Value = arr(1)
For x = 2 To UBound(arr) 'process rest of lines
ln = Trim(arr(x))
If ln Like "(*)" Then 'timestamp?
ws2.Cells(nextRow, "C").Value = ln
ElseIf ln Like "*:*" Then 'speaker text?
arr2 = Split(ln, ":", 2)
ws2.Cells(nextRow, "D").Value = arr2(0) 'speaker
ws2.Cells(nextRow, "E").Value = arr2(1) 'content
nextRow = nextRow + 1
End If
Next x
End If
nextRow = nextRow + 1
Next f
End Sub
'return a Collection of user-selected Word files
Function SelectedFiles() As Collection
Dim f
Set SelectedFiles = New Collection
With Application.FileDialog(msoFileDialogFilePicker)
.Title = "Select one or more Word Files"
.AllowMultiSelect = True
.Filters.Add "Word files", "*.doc*", 1
If .Show = -1 Then
For Each f In .SelectedItems
SelectedFiles.Add f
Next f
End If
End With
End Function
'Get a running Word instance, or start a new instance
Function GetWordApp() As Object
On Error Resume Next
Set GetWordApp = GetObject(, "Word.Application")
On Error GoTo 0
If GetWordApp Is Nothing Then
Set GetWordApp = CreateObject("Word.Application") 'assuming this works ok...
End If
GetWordApp.Visible = True
End Function