将 blob 从表导出到单个文件的最快方法

问题描述 投票:0回答:5

将存储在 SQL Server 表中的文件(blob)导出到硬盘驱动器上的文件的最快方法是什么?我有超过 2.5 TB 的文件(平均 90 kb)存储为 varbinary,我需要尽快将每个文件提取到本地硬盘驱动器。 BCP 似乎可以工作,但以我看到的速度需要 45 天以上,而且我担心我的脚本会在某个时候失败,因为 Management Studio 会耗尽内存。

sql sql-server-2008-r2 blob
5个回答
37
投票

我尝试使用 CLR 函数,它的速度比 BCP 快两倍多。这是我的代码。

原始方法

SET @bcpCommand = 'bcp "SELECT blobcolumn FROM blobtable WHERE ID = ' + CAST(@FileID AS VARCHAR(20)) + '" queryout "' + @FileName + '" -T -c'
EXEC master..xp_cmdshell @bcpCommand

CLR 方法:

declare @file varbinary(max) = (select blobcolumn from blobtable WHERE ID = @fileid)
declare @filepath nvarchar(4000) = N'c:\temp\' + @FileName
SELECT Master.dbo.WriteToFile(@file, @filepath, 0)

CLR 函数的 C# 代码

using System;
using System.Data;
using System.Data.SqlTypes;
using System.IO;
using Microsoft.SqlServer.Server;

namespace BlobExport
{
    public class Functions
    {
      [SqlFunction]
      public static SqlString WriteToFile(SqlBytes binary, SqlString path, SqlBoolean append)
      {        
        try
        {
          if (!binary.IsNull && !path.IsNull && !append.IsNull)
          {         
            var dir = Path.GetDirectoryName(path.Value);           
            if (!Directory.Exists(dir))              
              Directory.CreateDirectory(dir);            
              using (var fs = new FileStream(path.Value, append ? FileMode.Append : FileMode.OpenOrCreate))
            {
                byte[] byteArr = binary.Value;
                for (int i = 0; i < byteArr.Length; i++)
                {
                    fs.WriteByte(byteArr[i]);
                };
            }
            return "SUCCESS";
          }
          else
             "NULL INPUT";
        }
        catch (Exception ex)
        {          
          return ex.Message;
        }
      }
    }
}

23
投票

我来到这里寻找以最少的努力将 blob 导出到文件中。 CLR 函数并不是我所说的最省力的函数。 这里描述了更懒的一种,使用OLE自动化:

declare @init int
declare @file varbinary(max) = CONVERT(varbinary(max), N'your blob here')
declare @filepath nvarchar(4000) = N'c:\temp\you file name here.txt'

EXEC sp_OACreate 'ADODB.Stream', @init OUTPUT; -- An instace created
EXEC sp_OASetProperty @init, 'Type', 1; 
EXEC sp_OAMethod @init, 'Open'; -- Calling a method
EXEC sp_OAMethod @init, 'Write', NULL, @file; -- Calling a method
EXEC sp_OAMethod @init, 'SaveToFile', NULL, @filepath, 2; -- Calling a method
EXEC sp_OAMethod @init, 'Close'; -- Calling a method
EXEC sp_OADestroy @init; -- Closed the resources

您可能需要允许在服务器上运行 OA 存储过程(然后在完成后将其关闭):

sp_configure 'show advanced options', 1;  
GO  
RECONFIGURE;  
GO  
sp_configure 'Ole Automation Procedures', 1;  
GO  
RECONFIGURE;  
GO

4
投票

对我来说,结合我读过的所有帖子的效果是:

1.启用 OLE 自动化 - 如果未启用

sp_configure 'show advanced options', 1;  
GO  
RECONFIGURE;  
GO  
sp_configure 'Ole Automation Procedures', 1;  
GO  
RECONFIGURE;  
GO

2.创建一个用于存储生成文件的文件夹:

C:\GREGTESTING

3.创建用于文件生成的 DocTable 并将 blob 存储在 Doc_Content 中

CREATE TABLE [dbo].[Document](
    [Doc_Num] [numeric](18, 0) IDENTITY(1,1) NOT NULL,
    [Extension] [varchar](50) NULL,
    [FileName] [varchar](200) NULL,
    [Doc_Content] [varbinary](max) NULL   
) ON [PRIMARY] TEXTIMAGE_ON [PRIMARY] 

INSERT [dbo].[Document] ([Extension] ,[FileName] , [Doc_Content] )
    SELECT 'pdf', 'SHTP Notional hire - January 2019.pdf', 0x....(varbinary blob)

重要提示!

不要忘记在 Doc_Content 列中添加您要生成的文件的 varbinary!

4.运行以下脚本

DECLARE @outPutPath varchar(50) = 'C:\GREGTESTING'
, @i bigint
, @init int
, @data varbinary(max) 
, @fPath varchar(max)  
, @folderPath  varchar(max)

--Get Data into temp Table variable so that we can iterate over it 
DECLARE @Doctable TABLE (id int identity(1,1), [Doc_Num]  varchar(100) , [FileName]  varchar(100), [Doc_Content] varBinary(max) )



INSERT INTO @Doctable([Doc_Num] , [FileName],[Doc_Content])
Select [Doc_Num] , [FileName],[Doc_Content] FROM  [dbo].[Document]



SELECT @i = COUNT(1) FROM @Doctable   

WHILE @i >= 1   

BEGIN    

SELECT 
    @data = [Doc_Content],
    @fPath = @outPutPath + '\' + [Doc_Num] +'_' +[FileName],
    @folderPath = @outPutPath + '\'+ [Doc_Num]
FROM @Doctable WHERE id = @i

EXEC sp_OACreate 'ADODB.Stream', @init OUTPUT; -- An instace created
EXEC sp_OASetProperty @init, 'Type', 1;  
EXEC sp_OAMethod @init, 'Open'; -- Calling a method
EXEC sp_OAMethod @init, 'Write', NULL, @data; -- Calling a method
EXEC sp_OAMethod @init, 'SaveToFile', NULL, @fPath, 2; -- Calling a method
EXEC sp_OAMethod @init, 'Close'; -- Calling a method
EXEC sp_OADestroy @init; -- Closed the resources
print 'Document Generated at - '+  @fPath   

--Reset the variables for next use
SELECT @data = NULL  
, @init = NULL
, @fPath = NULL  
, @folderPath = NULL
SET @i -= 1
END   

5.结果如下图所示:


0
投票
#region Help:  Introduction to the script task
/* The Script Task allows you to perform virtually any operation that can be accomplished in
 * a .Net application within the context of an Integration Services control flow. 
 * 
 * Expand the other regions which have "Help" prefixes for examples of specific ways to use
 * Integration Services features within this script task. */
#endregion


#region Namespaces
using System;
using System. Data;
using Microsoft.SqlServer.Dts.Runtime;
using System.Windows.Forms;
using System.IO;
using System.Data.OleDb;
using System.Data.SqlClient;
using System.Data.SqlTypes;
#endregion

namespace ST_d4b314af29e4467f9d040056aad2fa70
{
    /// <summary>
    /// ScriptMain is the entry point class of the script.  Do not change the name, attributes,
    /// or parent of this class.
    /// </summary>
    [Microsoft.SqlServer.Dts.Tasks.ScriptTask.SSISScriptTaskEntryPointAttribute]
    public partial class ScriptMain : Microsoft.SqlServer.Dts.Tasks.ScriptTask.VSTARTScriptObjectModelBase
    {
        #region Help:  Using Integration Services variables and parameters in a script
        /* To use a variable in this script, first ensure that the variable has been added to 
         * either the list contained in the ReadOnlyVariables property or the list contained in 
         * the ReadWriteVariables property of this script task, according to whether or not your
         * code needs to write to the variable.  To add the variable, save this script, close this instance of
         * Visual Studio, and update the ReadOnlyVariables and 
         * ReadWriteVariables properties in the Script Transformation Editor window.
         * To use a parameter in this script, follow the same steps. Parameters are always read-only.
         * 
         * Example of reading from a variable:
         *  DateTime startTime = (DateTime) Dts.Variables["System::StartTime"].Value;
         * 
         * Example of writing to a variable:
         *  Dts.Variables["User::myStringVariable"].Value = "new value";
         * 
         * Example of reading from a package parameter:
         *  int batchId = (int) Dts.Variables["$Package::batchId"].Value;
         *  
         * Example of reading from a project parameter:
         *  int batchId = (int) Dts.Variables["$Project::batchId"].Value;
         * 
         * Example of reading from a sensitive project parameter:
         *  int batchId = (int) Dts.Variables["$Project::batchId"].GetSensitiveValue();
         * */

        #endregion

        #region Help:  Firing Integration Services events from a script
        /* This script task can fire events for logging purposes.
         * 
         * Example of firing an error event:
         *  Dts.Events.FireError(18, "Process Values", "Bad value", "", 0);
         * 
         * Example of firing an information event:
         *  Dts.Events.FireInformation(3, "Process Values", "Processing has started", "", 0, ref fireAgain)
         * 
         * Example of firing a warning event:
         *  Dts.Events.FireWarning(14, "Process Values", "No values received for input", "", 0);
         * */
        #endregion

        #region Help:  Using Integration Services connection managers in a script
        /* Some types of connection managers can be used in this script task.  See the topic 
         * "Working with Connection Managers Programmatically" for details.
         * 
         * Example of using an Adnet connection manager:
         *  object rawConnection = Dts.Connections["Sales DB"].AcquireConnection(Dts.Transaction);
         *  SqlConnection myADONETConnection = (SqlConnection)rawConnection;
         *  //Use the connection in some code here, then release the connection
         *  Dts.Connections["Sales DB"].ReleaseConnection(rawConnection);
         *
         * Example of using a File connection manager
         *  object rawConnection = Dts.Connections["Prices.zip"].AcquireConnection(Dts.Transaction);
         *  string filePath = (string)rawConnection;
         *  //Use the connection in some code here, then release the connection
         *  Dts.Connections["Prices.zip"].ReleaseConnection(rawConnection);
         * */
        #endregion


        /// <summary>
        /// This method is called when this script task executes in the control flow.
        /// Before returning from this method, set the value of Dts.TaskResult to indicate success or failure.
        /// To open Help, press F1.
        /// </summary>
        public void Main()
        {
            FileStream fs;
            BinaryWriter bw;
            int bufferSize = 100;
            byte[] outbyte = new byte[bufferSize];
            long retval;
            long startIndex = 0;
            String Path = "";
            string filename = "";
            SqlBytes Binary;
            
            SqlConnection cn = new SqlConnection("Data Source=DESKTOP-3D2QK6P;Initial Catalog=Test;Integrated Security=SSPI;");
            SqlCommand logoCMD = new SqlCommand("SELECT Folder_name,FileName,Doc_Content FROM dbo.document", cn);
            cn.Open();
            SqlDataReader myReader = logoCMD.ExecuteReader(CommandBehavior.Default);
           

            while (myReader.Read())
            {
                Path = myReader.GetString(0);
                filename = myReader.GetString(1);
                Binary = myReader.GetSqlBytes(2);
                
                if (!Binary.IsNull && Path!="" && filename!="")
                {
                    var dir = System.IO.Path.GetDirectoryName(Path);
                    if (!Directory.Exists(dir))
                    {
                        Directory.CreateDirectory(dir);
                    }
                    
                        fs = new FileStream(Path+"\\"+filename+".pdf", FileMode.OpenOrCreate, FileAccess.Write);
                        bw = new BinaryWriter(fs);
                        startIndex = 0;
                        retval = myReader.GetBytes(2, startIndex, outbyte, 0, bufferSize);
                        while (retval == bufferSize)
                        {
                            bw.Write(outbyte);
                            bw.Flush();

                            // Reposition the start index to the end of the last buffer and fill the buffer.
                            startIndex += bufferSize;
                            retval = myReader.GetBytes(2, startIndex, outbyte, 0, bufferSize);
                        }

                        // Write the remaining buffer.
                        bw.Write(outbyte, 0, (int)retval - 1);
                        bw.Flush();

                        // Close the output file.
                        bw.Close();
                        fs.Close();
                    }
                

                
            }
            // Close the reader and the connection.
            myReader.Close();
            cn.Close();
        }

        

        #region ScriptResults declaration
        /// <summary>
        /// This enum provides a convenient shorthand within the scope of this class for setting the
        /// result of the script.
        /// 
        /// This code was generated automatically.
        /// </summary>
        enum ScriptResults
        {
            Success = Microsoft.SqlServer.Dts.Runtime.DTSExecResult.Success,
            Failure = Microsoft.SqlServer.Dts.Runtime.DTSExecResult.Failure
        };
        #endregion

    }
}

0
投票

我正在寻找一种方法将几千张图像导出到

E:\StudentPhotosNew
,每个图像都是
StudentRef.jpg
,它可以每晚运行它,并且只导出那些尚未导出的图像并避免使用光标,所以我创建了一个基于脚本在上面的示例中,对我来说,第一次运行需要 22 秒,之后由于跳过现有文件,速度会更快。

在此示例中,我正在导出学生照片,但如果您修改 #Files 临时表查询以适合您的表结构,则图像可以是任何内容,因为我保留了其余代码的通用性。

SET XACT_ABORT, NOCOUNT ON;

DECLARE @Message NVARCHAR(MAX);
SET @Message = N'Starting Operation at ' + FORMAT ( SYSDATETIME(), 'dd/MM/yyyy HH:mm:ss' );
RAISERROR ( @Message, 0, 1 ) WITH NOWAIT;

DECLARE @AcademicYear VARCHAR(5) = '23/24'
DECLARE @FolderPath VARCHAR(250) = 'E:\StudentPhotosNew'


DECLARE @FileID INT
DECLARE @FileData VARBINARY(MAX)
DECLARE @FilePath VARCHAR(MAX)


DECLARE @CheckExistingFile INT
DECLARE @ExistingFile INT
DECLARE @FileObject INT

DECLARE @NewFilesExported INT = 0
DECLARE @ExistingFilesAlreadyExported INT = 0

--Temporary table to hold list of files to export
DROP TABLE IF EXISTS #Files
SELECT
    FileID =
        ROW_NUMBER () OVER (
            ORDER BY
                SD.RefNo
        ),
    FileName = RTRIM ( SD.RefNo ) + '.jpg',
    FileData = PHO.Photo,
    FilePath = 
        @FolderPath
        + CASE
            WHEN RIGHT ( @FolderPath, 1 ) = '\' THEN ''
            ELSE '\'
        END
        + RTRIM ( SD.RefNo ) + '.jpg',
    IsExported = CAST ( 0 AS BIT )
    INTO #Files
FROM StudentDetail SD
INNER JOIN StudentPhoto PHO
    ON PHO.StudentID = SD.StudentID
WHERE
    SD.AcademicYearID = @AcademicYear


--Check for records still to export and export 1 by 1
WHILE (
    SELECT
        RecordsStillToExport = COUNT ( F.FileID )
    FROM #Files F
    WHERE
        F.IsExported = 0
) > 0
BEGIN
    SELECT
        @FileID = F.FileID,
        @FileData = F.FileData,
        @FilePath = F.FilePath
    FROM #Files F
    WHERE
        F.FileID = ( SELECT MIN ( F.FileID ) FROM #Files F WHERE F.IsExported = 0 )


    --Check if file exists already - @CheckExistingFile will be 0
    EXEC @CheckExistingFile = sp_OACreate 'Scripting.FileSystemObject', @FileObject OUTPUT
    EXEC @CheckExistingFile = sp_OAMethod @FileObject, 'GetFile', @ExistingFile OUT, @FilePath
    EXEC sp_OADestroy @FileObject; -- Destroy Object
    
    --Export file if not already exported
    IF @CheckExistingFile <> 0
    BEGIN
        EXEC sp_OACreate 'ADODB.Stream', @FileObject OUTPUT; -- Create Object
        EXEC sp_OASetProperty @FileObject, 'Type', 1;
        EXEC sp_OAMethod @FileObject, 'Open';
        EXEC sp_OAMethod @FileObject, 'Write', NULL, @FileData;
        EXEC sp_OAMethod @FileObject, 'SaveToFile', NULL, @FilePath, 2;
        EXEC sp_OAMethod @FileObject, 'Close';
        EXEC sp_OADestroy @FileObject; -- Destroy Object

        SET @NewFilesExported += 1
    END
    ELSE
    BEGIN
        SET @ExistingFilesAlreadyExported += 1
    END

    UPDATE F
    SET
        F.IsExported = 1
    FROM #Files F
    WHERE
        F.FileID = @FileID
END

SET @Message = CAST ( @NewFilesExported AS NVARCHAR(50) ) + N' New Files Exported' + CHAR(13) + CHAR(10) + CAST ( @ExistingFilesAlreadyExported AS NVARCHAR(50) ) + N' Existing Files Already Exported';
RAISERROR ( @Message, 0, 1 ) WITH NOWAIT;

SET @Message = N'Completing Operation at ' + FORMAT ( SYSDATETIME(), 'dd/MM/yyyy HH:mm:ss' );
RAISERROR ( @Message, 0, 1 ) WITH NOWAIT;
© www.soinside.com 2019 - 2024. All rights reserved.