使用批处理脚本从docx文件中读取文本内容

问题描述 投票:0回答:1

我能够从Windows批处理脚本中的txt个文件中读取文本。但是我找不到从docx文件读取数据的方法。我该怎么办?

batch-file file-conversion
1个回答
0
投票

放在要获取txt的File.docx所在的同一文件夹中:

[docx2txt.cmd file.docx

或使用:

[Drag_and_Drop

导致记事本txt打开,其中txt与。docx中的文件同名,但在。txt中]

  • docx2txt.cmd 已更新
 @echo off & setlocal enabledelayedexpansion

((
echo/"%~1"| findstr /lic:["\.docx\""$] >nul) && (
for /f "tokens=* delims= " %%i in ('echo/%~1') do (
set "_Docx2txt=%%~dpni.txt" && >nul copy /y "!_Docx2txt:~0,-4!.docx" "%temp%\Docx.zip") 
if /i not exist "%temp%\Docx.zip" set "_Msg_Err=%~1 not valid^!" && goto :_error_:
set "_Docx_zDir_=%temp%\Docx_Zip"
set "_Docx_uZip_=%temp%\Docx.zip"
set "_replace_00=openxmlformats"
set "_replace_01=urn:schemas-microsoft-com:vml"
set "_replace_02=urn:schemas-microsoft-com:office:word"
set "_replace_03=urn:schemas-microsoft-com:office:office"
set _Run_CScript="%Windir%\System32\CScript.exe" //nologo
set _Break_line=!_Run_CScript! "%temp%\Break_line.vbs"
set _Replc_Qute=!_Run_CScript! "%temp%\Replc_Qute.vbs"
set _Replc_Dots=!_Run_CScript! "%temp%\Replc_Dots.vbs"
set _Replc_Tag1=!_Run_CScript! "%temp%\Replc_Tag1.vbs"
set _Replc_Tag2=!_Run_CScript! "%temp%\Replc_Tag2.vbs"
set _Find_Replc=!_Run_CScript! "%temp%\Find_Rep.vbs"
set _UnZip_Docx=!_Run_CScript! "%temp%\UnZip.vbs"
set _Drop_Lines=^

<nul & rem .:| This blank line is needed to do this job! So, do not remove it! |:.
) 2>nul || (
:_error_:
cls & echo/ & color F4 & set "_Arg_PS=New-Object -ComObject Wscript.Shell" & echo/ 
set "_Arg_Err=Valid: Some_Document.Docx" & set "_Err=E R R O R ^! Argumment missing: " 
if not defined _Msg_Err set _Msg_Err="%~0" [+ !_Arg_Err!]& echo/ Well, something is really wrong^^!
echo/ & powershell ^(!_Arg_PS!^).Popup^("""!_Msg_Err!""",0,"""!_Err!""",0x10^) 2>nul >nul
echo/ Use: !_Msg_Err! & timeout /t -1 2>nul >nul & color 0A & goto :eof
)) 2>nul

call :_write_vbs_files_: & type nul >"!_Docx2txt!"

(rmdir /q /s "!_Docx_zDir_!" & ping 127.1 -n 1 >nul && mkdir "!_Docx_zDir_!" || mkdir "!_Docx_zDir_!") 2>nul >nul

!_UnZip_Docx! && type nul >"!_Docx_zDir_!\Docx_Text.txt" & type nul >"!_Docx_zDir_!\Docx.tmp" 

copy /y "!_Docx_zDir_!\word\document.xml" "!_Docx_zDir_!\Docx.tmp" >nul 

for %%r in ("/^>^</","^<w:t xml:space^=","/^>","^</a:^","w:rPr^>","^<w:r","^</w:t^>","xmlns:","^<w:instrText", "^</w:instrText"^
 ) do !_Find_Replc! "!_Docx_zDir_!\Docx.tmp" %%r "!_Drop_Lines!"

for /l %%l in (0 1 3) do !_Find_Replc! "!_Docx_zDir_!\Docx.tmp" "!_replace_0%%l!" "!_Drop_Lines!"

!_Replc_Tag1! "!_Docx_zDir_!\Docx.tmp"
!_Replc_Tag2! "!_Docx_zDir_!\Docx.tmp"
!_Replc_Qute! "!_Docx_zDir_!\Docx.tmp"
!_Find_Replc! "!_Docx_zDir_!\Docx.tmp" chr^(47^)^&"schemas"^&chr^(46^)^&"microsoft"^&chr^(46^)^&"com"^&chr^(47^)^&^chr^(47^) "!_Drop_Lines!"
!_Find_Replc! "!_Docx_zDir_!\Docx.tmp" chr^(47^)^&"schemas"^&chr^(46^)^&"openxmlformats"^&chr^(46^) "!_Drop_Lines!"
!_Find_Replc! "!_Docx_zDir_!\Docx.tmp" chr^(46^)^&"org"^&^chr^(47^)^&^chr^(47^) "!_Drop_Lines!"
!_Find_Replc! "!_Docx_zDir_!\Docx.tmp" "^<w:t^>" "1#2#@3#4#"
!_Find_Replc! "!_Docx_zDir_!\Docx.tmp" "^<a:t^>" "1#2#@3#4#"
!_Find_Replc! "!_Docx_zDir_!\Docx.tmp" chr^(32^)^&"1#2#@3#4#" "1#2#@3#4#"
!_Find_Replc! "!_Docx_zDir_!\Docx.tmp" "1#2#@3#4#"^&char^(32^) "1#2#@3#4#"

!_Find_Replc! "!_Docx_zDir_!\Docx.tmp" "^>^<" "!_Drop_Lines!"
!_Replc_Dots! "!_Docx_zDir_!\Docx.tmp"

for %%r in (org/officeDocument,org/officeDocument/,org/,relationships,wordprocessingDrawing,http://schemas.,^
presentationml,spreadsheetDrawing,speadsheetDrawing,wordprocessingml,drawingml,wordprocessingShape^
 ) do !_Find_Replc! "!_Docx_zDir_!\Docx.tmp" %%r "!_Drop_Lines!"

for %%r in (chr^(60^)^&chr^(63^),chr^(63^)^&chr^(62^),chr^(9^)) do !_Find_Replc! "!_Docx_zDir_!\Docx.tmp" %%r "!_Drop_Lines!" 

type "!_Docx_zDir_!\Docx.tmp"|findstr "1#2#@3#4#">>"!_Docx_zDir_!\Docx_Text.txt"

!_Find_Replc! "!_Docx_zDir_!\Docx_Text.txt" "1#2#@3#4#" "!_Drop_Lines!"

!_Break_line! "!_Docx_zDir_!\Docx_Text.txt"

!_Find_Replc! "!_Docx_zDir_!\Docx_Text.txt" chr^(32^)^&chr^(46^) "!_Drop_Lines!"
!_Find_Replc! "!_Docx_zDir_!\Docx_Text.txt" chr^(32^)^&chr^(32^)^&chr^(46^) "!_Drop_Lines!"

type "!_Docx_zDir_!\Docx_Text.txt"| more /e /s /p +3 | findstr /rc:"[\ ]" | findstr /vb "\ \." >>"!_Docx2txt!"

!_Find_Replc! "!_Docx_zDir_!\Docx_Text.txt" "xml"^&chr^(58^) "!_Droplines!
!_Find_Replc! "!_Docx_zDir_!\Docx_Text.txt" "space"^&chr^(61^) "!_Drop_Lines!"

start /b notepad.exe "!_Docx2txt!" && rmdir /q /s "!_Docx_zDir_!" 2>nul >nul 

>nul (for %%D in (Find_Rep Replc_Tag1 Replc_Tag2 Replc_Qute Replc_Dots Break_Line UnZip) do del /q /f "%temp%\%%D.vbs" 
del /q /f "%temp%\docx.zip") & goto :_end_of_file_:

:_write_vbs_files_:

>"%temp%\Find_Rep.vbs"^
    (
     echo/ Const ForReading = 1
     echo/ Const ForWriting = 2
     echo/ strFileName = WScript.Arguments^(0^)
     echo/ strOldText = WScript.Arguments^(1^)
     echo/ strNewText = WScript.Arguments^(2^)
     echo/ Set objFSO = CreateObject^("Scripting.FileSystemObject"^)
     echo/ Set objFile = objFSO.OpenTextFile^(strFileName, ForReading,ForReading^)
     echo/ strText = objFile.ReadAll
     echo/ strNewText = Replace^(strText, strOldText, strNewText^)
     echo/ Set objFile = objFSO.OpenTextFile^(strFileName, ForWriting^)
     echo/ objFile.Write strNewText  'WriteLine adds extra CR/LF
     echo/ objFile.Close
    )

>"%temp%\Replc_Tag1.vbs"^
    (
     echo/ Const ForReading = 1
     echo/ Const ForWriting = 2
     echo/ strFileName = Wscript.Arguments^(0^)
     echo/ strOldText = ^(chr^(60^)^&chr^(87^)^&chr^(58^)^&chr^(84^)^&chr^(62^)^)
     echo/ strNewText = ^(vbCr^&vbLf^&chr^(34^)^&preserv^&chr^(34^)^&chr^(62^)^)
     echo/ Set objFSO = CreateObject^("Scripting.FileSystemObject"^)
     echo/ Set objFile = objFSO.OpenTextFile^(strFileName, ForReading,ForReading^)
     echo/ strText = objFile.ReadAll
     echo/ objFile.Close
     echo/ strNewText = Replace^(strText, strOldText, strNewText^)
     echo/ Set objFile = objFSO.OpenTextFile^(strFileName, ForWriting^)
     echo/ objFile.Write strNewText  'WriteLine adds extra CR/LF
     echo/ objFile.Close
    )

>"%temp%\Replc_Tag2.vbs"^
    (
     echo/ Const ForReading = 1
     echo/ Const ForWriting = 2
     echo/ strFileName = Wscript.Arguments^(0^)
     echo/ strOldText = ^(chr^(34^)^&"preserve"^&chr^(34^)^&chr^(62^)^)
     echo/ strNewText = ^("1"^&chr^(35^)^&"2"^&chr^(35^)^&chr^(64^)^&"3"^&chr^(35^)^&"4"^&chr^(35^)^)
     echo/ Set objFSO = CreateObject^("Scripting.FileSystemObject"^)
     echo/ Set objFile = objFSO.OpenTextFile^(strFileName, ForReading,ForReading^)
     echo/ strText = objFile.ReadAll
     echo/ objFile.Close
     echo/ strNewText = Replace^(strText, strOldText, strNewText^)
     echo/ Set objFile = objFSO.OpenTextFile^(strFileName, ForWriting^)
     echo/ objFile.Write strNewText  'WriteLine adds extra CR/LF
     echo/ objFile.Close
    )

>"%temp%\Replc_Qute.vbs"^
    (
     echo/ Const ForReading = 1
     echo/ Const ForWriting = 2
     echo/ strFileName = Wscript.Arguments^(0^)
     echo/ strOldText = ^(chr^(34^)^&"preserve"^&chr^(34^)^&chr^(62^)^)
     echo/ strNewText = ^("1"^&chr^(35^)^&"2"^&chr^(35^)^&chr^(64^)^&"3"^&chr^(35^)^&"4"^&chr^(35^)^)
     echo/ Set objFSO = CreateObject^("Scripting.FileSystemObject"^)
     echo/ Set objFile = objFSO.OpenTextFile^(strFileName, ForReading,ForReading^)
     echo/ strText = objFile.ReadAll
     echo/ objFile.Close
     echo/ strNewText = Replace^(strText, strOldText, strNewText^)
     echo/ Set objFile = objFSO.OpenTextFile^(strFileName, ForWriting^)
     echo/ objFile.Write strNewText  'WriteLine adds extra CR/LF
     echo/ objFile.Close
    )

>"%temp%\Replc_Dots.vbs"^
    (
     echo/ Const ForReading = 1
     echo/ Const ForWriting = 2
     echo/ strFileName = Wscript.Arguments^(0^)
     echo/ strOldText = ^(chr^(46^)^&^chr^(0^)^)            
     echo/ strNewText = ^(chr^(46^)^&chr^(13^)^&"1"^&chr^(35^)^&"2"^&chr^(35^)^&chr^(64^)^&"3"^&chr^(35^)^&"4"^&chr^(35^)^)
     echo/ Set objFSO = CreateObject^("Scripting.FileSystemObject"^)
     echo/ Set objFile = objFSO.OpenTextFile^(strFileName, ForReading,ForReading^)
     echo/ strText = objFile.ReadAll
     echo/ objFile.Close
     echo/ strNewText = Replace^(strText, strOldText, strNewText^)
     echo/ Set objFile = objFSO.OpenTextFile^(strFileName, ForWriting^)
     echo/ objFile.Write strNewText  'WriteLine adds extra CR/LF
     echo/ objFile.Close
    )

>"%temp%\Break_Line.vbs"^
    (
     echo/ Const ForReading = 1
     echo/ Const ForWriting = 2
     echo/ strFileName = Wscript.Arguments^(0^)
     echo/ strOldText = ^(chr^(46^)^)            
     echo/ strNewText = ^(chr^(46^)^&vbCr^&vbLf^)
     echo/ Set objFSO = CreateObject^("Scripting.FileSystemObject"^)
     echo/ Set objFile = objFSO.OpenTextFile^(strFileName, ForReading,ForReading^)
     echo/ strText = objFile.ReadAll
     echo/ objFile.Close
     echo/ strNewText = Replace^(strText, strOldText, strNewText^)
     echo/ Set objFile = objFSO.OpenTextFile^(strFileName, ForWriting^)
     echo/ objFile.Write strNewText  'WriteLine adds extra CR/LF
     echo/ objFile.Close
    )

>"%temp%\UnZip.vbs"^
    (
     echo/ ZipFile="!_Docx_uZip_!"
     echo/ ExtractTo="!_Docx_zDir_!\"
     echo/ set objShell = CreateObject^("Shell.Application"^)
     echo/ set FilesInZip=objShell.NameSpace^(ZipFile^).items
     echo/ Set fso = CreateObject^("Scripting.FileSystemObject"^)
     echo/ objShell.NameSpace^(ExtractTo^).CopyHere^(FilesInZip^)
     echo/ Set fso = Nothing
     echo/ Set objShell = Nothing
    ) 

exit /b

:_end_of_file_:
    
© www.soinside.com 2019 - 2024. All rights reserved.