我正在开发一个 Flask 应用程序,该应用程序提供来自 Azure Blob 存储容器的静态文件。我遇到了一个性能问题,即从应用程序中的一个目录导航到另一个目录的速度非常慢。下面是我的 Flask 路线的相关部分,列出了目录内容:
from flask import Flask, render_template, redirect, url_for, Response, stream_with_context
import os
from azure.storage.blob import BlobServiceClient
app = Flask(__name__)
# Azure connection details\
connection_string = os.getenv('AZURE_STORAGE_CONNECTION_STRING')
service_client = BlobServiceClient.from_connection_string(connection_string)
container_name = 'staticfiles'
container_client = service_client.get_container_client(container_name)
@app.route('/', defaults={'path': ''})
@app.route('/<path:path>')
def index(path):
# Normalize directory paths by ensuring they end with a slash
if path and not path.endswith('/') and not '.' in path.split('/')[-1]:
return redirect(url_for('index', path=path + '/'))
# If the path has a file extension, attempt to serve the file
if '.' in path.split('/')[-1]: # Checks for file extension in last path segment
return serve_file(path)
# If the path is a directory, first attempt to serve index.html
return list_directory_contents(path)
def serve_file(path):
blob_client = container_client.get_blob_client(blob=path)
if blob_client.exists():
blob_data = blob_client.download_blob()
# Dictionary mapping file extensions to MIME types
extension_to_mimetype = {
'.html': 'text/html',
'.css': 'text/css',
'.js': 'application/javascript',
'.png': 'image/png',
'.jpg': 'image/jpeg',
'.jpeg': 'image/jpeg',
'.gif': 'image/gif',
'.svg': 'image/svg+xml',
'.woff': 'font/woff',
'.woff2': 'font/woff2',
'.ttf': 'font/ttf',
'.pdf': 'application/pdf',
'.txt': 'text/plain',
'.json': 'application/json',
'.xml': 'application/xml'
}
# Default content type
content_type = 'application/octet-stream'
# Find the file extension and set the appropriate content type
file_extension = os.path.splitext(path)[1].lower()
if file_extension in extension_to_mimetype:
content_type = extension_to_mimetype[file_extension]
return Response(stream_with_context(blob_data.chunks()), mimetype=content_type)
return "File not found", 404
def list_directory_contents(path):
# Check for index.html in the current directory
index_blob_path = os.path.join(path, 'index.html')
index_blob_client = container_client.get_blob_client(index_blob_path)
if index_blob_client.exists():
return serve_file(index_blob_path)
# If no index.html, proceed to list the directory contents
blob_list = container_client.list_blobs(name_starts_with=path)
files = []
directories = set()
for blob in blob_list:
if _is_ignored(blob.name):
continue
relative_path = blob.name[len(path):].lstrip('/')
if '/' in relative_path:
directory = relative_path.split('/')[0]
if directory not in directories:
directories.add(directory)
files.append({
'name': directory,
'mtime': '', # Directories don't have mtime
'is_dir': True,
'size': '',
'full_path': path + directory + '/'
})
else:
files.append({
'name': relative_path,
'mtime': blob.last_modified.strftime('%Y-%m-%d %H:%M:%S'),
'is_dir': False,
'size': blob.size,
'full_path': blob.name
})
# Generate breadcrumbs
breadcrumb_parts = _generate_breadcrumbs(path)
return render_template('index.html', path=path, files=files, breadcrumb_parts=breadcrumb_parts)
def _is_ignored(blob_name):
ignored_blobs = {'static', 'templates', 'app.py', '.git', 'TrainingEnv', '.idea'} # Update if necessary
return any(ignored in blob_name for ignored in ignored_blobs)
def _generate_breadcrumbs(path):
path_parts = path.strip('/').split('/')
breadcrumb_parts = [{'name': part, 'url': os.path.join('/', *path_parts[:i + 1]) + '/'} for i, part in
enumerate(path_parts) if part]
return breadcrumb_parts
if __name__ == '__main__':
app.run()
我正在使用 Azure 存储帐户和 Web 应用服务。打开索引 html 很快,但遍历目录却非常慢。即使一个目录有 2-3 个目录也很慢。
我遇到了一个性能问题,即在应用程序中从一个目录导航到另一个目录的速度非常慢
缓慢的主要原因似乎是目录列表操作。通过在内存中维护目录结构的缓存,以避免重复调用同一目录的 Azure Blob 存储。
以下代码中添加了缓存机制。
代码:
from flask import Flask, render_template, redirect, url_for
import os
from azure.storage.blob import BlobServiceClient
from cachetools import TTLCache
app = Flask(__name__)
# Azure connection details
connection_string = "your-storage conn-string"
service_client = BlobServiceClient.from_connection_string(connection_string)
container_name = 'scm-releases'
container_client = service_client.get_container_client(container_name)
# Cache for directory listings
directory_cache = TTLCache(maxsize=100, ttl=300) # Adjust maxsize and ttl as needed
@app.route('/', defaults={'path': ''})
@app.route('/<path:path>')
def index(path):
path = path.rstrip('/') # Remove trailing slash to standardize path
if path in directory_cache:
files = directory_cache[path]
else:
files = list_directory_contents(path)
directory_cache[path] = files
return render_template('index.html', path=path, files=files, breadcrumb_parts=_generate_breadcrumbs(path))
def list_directory_contents(path):
blob_list = container_client.list_blobs(name_starts_with=path)
files = []
directories = set()
for blob in blob_list:
if blob.name == path: # Skip the base directory itself
continue
relative_path = blob.name[len(path):].lstrip('/')
if '/' in relative_path:
directory = relative_path.split('/')[0]
if directory not in directories:
directories.add(directory)
files.append({
'name': directory,
'mtime': '', # Directories don't have mtime
'is_dir': True,
'size': '',
'full_path': os.path.join(path, directory) + '/'
})
else:
files.append({
'name': relative_path,
'mtime': blob.last_modified.strftime('%Y-%m-%d %H:%M:%S'),
'is_dir': False,
'size': blob.size,
'full_path': blob.name
})
return files
def _generate_breadcrumbs(path):
path_parts = path.strip('/').split('/')
breadcrumb_parts = [{'name': part, 'url': '/' + '/'.join(path_parts[:i + 1]) + '/'} for i, part in enumerate(path_parts) if part]
return breadcrumb_parts
if __name__ == '__main__':
app.run(debug=True) # Run the Flask app in debug mode
index.html:
<!-- templates/index.html -->
<!DOCTYPE html>
<html>
<head>
<title>Directory Listing - {{ path }}</title>
</head>
<body>
<h1>Directory Listing - {{ path }}</h1>
<ul>
{% for file in files %}
<li>
{% if file.is_dir %}
<a href="{{ file.full_path }}">{{ file.name }}/</a>
{% else %}
<a href="{{ file.full_path }}">{{ file.name }}</a>
{% endif %}
</li>
{% endfor %}
</ul>
<p>Breadcrumbs:</p>
<ul>
{% for breadcrumb in breadcrumb_parts %}
<li><a href="{{ breadcrumb.url }}">{{ breadcrumb.name }}</a></li>
{% endfor %}
</ul>
</body>
</html>
上面的代码运行成功。检查下面:
输出:
list_directory_contents
函数现在直接用指定路径下的Blob填充files
列表,在获取所有Blob后无需进行过滤。这应该可以减少不必要的操作并提高性能。