使用boto3和Flask上传文件时,我看到了一个非常奇怪的问题。如果我上载标准的txt文件,则一切正常,但如果使用其他任何文件类型(例如zip,exe或pdf),则文件将被更改。 IE浏览器文件大小,类型和哈希与原始文件不同。
这里举个例子,我已经上传了Putty.exe。输出:
{
"file": "putty.exe",
"message": "success",
"path": "/tmp/putty.exe",
"s3": "",
"sha1": "9b36616702410e029857bc2dd3b7f2f0d7db187a",
"sha256": "5e3dfdce6cefb28f3cafe506a657a5ff44e199c3c821cbd587906f5aea435d10",
"size": "1.7 MB",
"type": "MS-DOS executable"
}
正确的腻子SHA-1是73016558c8353509b15cd757063816369e9abfa7
并且在本地通过flask应用测试时,文件大小和信息正确。
本地测试的输出。
{
"file": "putty.exe",
"message": "success",
"path": "/tmp/putty.exe",
"s3": "",
"sha1": "73016558c8353509b15cd757063816369e9abfa7",
"sha256": "736330aaa3a4683d3cc866153510763351a60062a236d22b12f4fe0f10853582",
"size": "1.0 MB",
"type": "PE32 executable (GUI) Intel 80386, for MS Windows"
}
这是我当前的测试代码。
import os
import boto3
import yara
import tempfile
import hashlib
import pefile
import uuid
import magic
from flask import Flask, request, redirect, send_file, url_for, jsonify
from werkzeug.utils import secure_filename
app = Flask(__name__)
bucket_name = "<Bucket name>"
# File size
def convert_bytes(num):
"""
this function will convert bytes to MB.... GB... etc
"""
for x in ['bytes', 'KB', 'MB', 'GB', 'TB']:
if num < 1024.0:
return "%3.1f %s" % (num, x)
num /= 1024.0
def file_size(file_path):
"""
this function will return the file size
"""
if os.path.isfile(file_path):
file_info = os.stat(file_path)
return convert_bytes(file_info.st_size)
def get_hash(filename):
fh = open(filename, 'rb')
m = hashlib.md5()
s = hashlib.sha1()
s256 = hashlib.sha256()
s512 = hashlib.sha512()
while True:
data = fh.read(8192)
if not data:
break
m.update(data)
s.update(data)
s256.update(data)
s512.update(data)
md5 = m.hexdigest()
sha1 = s.hexdigest()
sha256 = s256.hexdigest()
sha512 = s512.hexdigest()
return md5,sha1,sha256,sha512
def process_file(file):
s3_resource = boto3.resource('s3')
file_path = os.path.join(tempfile.gettempdir(),file)
s3_resource.Bucket(bucket_name).download_file(file,file_path)
return True
@app.route("/")
def main():
return '{"messasge" : "Hello world"}'
@app.route("/realtimescan", methods=['POST'])
def realtimescan():
filetype = ""
tempdirectory = tempfile.gettempdir()
file = request.files['file']
content_type = request.mimetype
if file:
filename = secure_filename(file.filename)
s3_resource = boto3.resource('s3')
my_bucket = s3_resource.Bucket(bucket_name)
my_bucket.Object(filename).put(Body=file.file)
#boto3.client('s3').put_object(Bucket=bucket_name, Key=filename, Body=file)
tmp_path = os.path.join(tempdirectory, filename)
if process_file(filename):
md5, sha1, sha256, sha512 = get_hash(tmp_path)
try:
filetype = str(magic.from_file(tmp_path, mime=False))
except:
filetype = "na"
ret = ({"message" : "success",
"file" : filename,
"path" : tmp_path,
"size" : file_size(tmp_path),
"type" : filetype,
"sha256" : sha256,
"sha1" : sha1,
"s3" : ""
})
else:
ret = ({"message" : "error"})
return ret
if __name__ == "__main__":
app.run()
我不知道这是什么问题。另外,如果有更好的方法来处理内存中的文件而不是保存到/ tmp,则不建议使用提示和技巧。
谢谢
确定原来需要将API网关设置下的二进制媒体类型设置为*/*
才能接受二进制文件类型。