Flask Web 应用程序 [Azure] 遍历目录非常慢

Question

我正在开发一个 Flask 应用程序，该应用程序提供来自 Azure Blob 存储容器的静态文件。我遇到了一个性能问题，即从应用程序中的一个目录导航到另一个目录的速度非常慢。下面是我的 Flask 路线的相关部分，列出了目录内容：

from flask import Flask, render_template, redirect, url_for, Response, stream_with_context
import os
from azure.storage.blob import BlobServiceClient

app = Flask(__name__)

# Azure connection details\
connection_string = os.getenv('AZURE_STORAGE_CONNECTION_STRING')
service_client = BlobServiceClient.from_connection_string(connection_string)
container_name = 'staticfiles'
container_client = service_client.get_container_client(container_name)

@app.route('/', defaults={'path': ''})
@app.route('/<path:path>')
def index(path):
    # Normalize directory paths by ensuring they end with a slash
    if path and not path.endswith('/') and not '.' in path.split('/')[-1]:
        return redirect(url_for('index', path=path + '/'))

    # If the path has a file extension, attempt to serve the file
    if '.' in path.split('/')[-1]:  # Checks for file extension in last path segment
        return serve_file(path)

    # If the path is a directory, first attempt to serve index.html
    return list_directory_contents(path)


def serve_file(path):
    blob_client = container_client.get_blob_client(blob=path)
    if blob_client.exists():
        blob_data = blob_client.download_blob()

        # Dictionary mapping file extensions to MIME types
        extension_to_mimetype = {
            '.html': 'text/html',
            '.css': 'text/css',
            '.js': 'application/javascript',
            '.png': 'image/png',
            '.jpg': 'image/jpeg',
            '.jpeg': 'image/jpeg',
            '.gif': 'image/gif',
            '.svg': 'image/svg+xml',
            '.woff': 'font/woff',
            '.woff2': 'font/woff2',
            '.ttf': 'font/ttf',
            '.pdf': 'application/pdf',
            '.txt': 'text/plain',
            '.json': 'application/json',
            '.xml': 'application/xml'
        }

        # Default content type
        content_type = 'application/octet-stream'

        # Find the file extension and set the appropriate content type
        file_extension = os.path.splitext(path)[1].lower()
        if file_extension in extension_to_mimetype:
            content_type = extension_to_mimetype[file_extension]

        return Response(stream_with_context(blob_data.chunks()), mimetype=content_type)

    return "File not found", 404

def list_directory_contents(path):
    # Check for index.html in the current directory
    index_blob_path = os.path.join(path, 'index.html')
    index_blob_client = container_client.get_blob_client(index_blob_path)
    if index_blob_client.exists():
        return serve_file(index_blob_path)

    # If no index.html, proceed to list the directory contents
    blob_list = container_client.list_blobs(name_starts_with=path)
    files = []
    directories = set()

    for blob in blob_list:
        if _is_ignored(blob.name):
            continue

        relative_path = blob.name[len(path):].lstrip('/')
        if '/' in relative_path:
            directory = relative_path.split('/')[0]
            if directory not in directories:
                directories.add(directory)
                files.append({
                    'name': directory,
                    'mtime': '',  # Directories don't have mtime
                    'is_dir': True,
                    'size': '',
                    'full_path': path + directory + '/'
                })
        else:
            files.append({
                'name': relative_path,
                'mtime': blob.last_modified.strftime('%Y-%m-%d %H:%M:%S'),
                'is_dir': False,
                'size': blob.size,
                'full_path': blob.name
            })

    # Generate breadcrumbs
    breadcrumb_parts = _generate_breadcrumbs(path)

    return render_template('index.html', path=path, files=files, breadcrumb_parts=breadcrumb_parts)

def _is_ignored(blob_name):
    ignored_blobs = {'static', 'templates', 'app.py', '.git', 'TrainingEnv', '.idea'}  # Update if necessary
    return any(ignored in blob_name for ignored in ignored_blobs)

def _generate_breadcrumbs(path):
    path_parts = path.strip('/').split('/')
    breadcrumb_parts = [{'name': part, 'url': os.path.join('/', *path_parts[:i + 1]) + '/'} for i, part in
                        enumerate(path_parts) if part]
    return breadcrumb_parts

if __name__ == '__main__':
    app.run()

我正在使用 Azure 存储帐户和 Web 应用服务。打开索引 html 很快，但遍历目录却非常慢。即使一个目录有 2-3 个目录也很慢。

Answer 1

我遇到了一个性能问题，即在应用程序中从一个目录导航到另一个目录的速度非常慢

缓慢的主要原因似乎是目录列表操作。通过在内存中维护目录结构的缓存，以避免重复调用同一目录的 Azure Blob 存储。

使用缓存机制将目录列表存储在内存中。这样，可以从缓存中处理对同一目录的后续请求，从而减少对 Azure Blob 存储的调用次数。

以下代码中添加了缓存机制。

代码：

from flask import Flask, render_template, redirect, url_for
import os
from azure.storage.blob import BlobServiceClient
from cachetools import TTLCache

app = Flask(__name__)

# Azure connection details
connection_string = "your-storage conn-string"
service_client = BlobServiceClient.from_connection_string(connection_string)
container_name = 'scm-releases'
container_client = service_client.get_container_client(container_name)

# Cache for directory listings
directory_cache = TTLCache(maxsize=100, ttl=300)  # Adjust maxsize and ttl as needed

@app.route('/', defaults={'path': ''})
@app.route('/<path:path>')
def index(path):
    path = path.rstrip('/')  # Remove trailing slash to standardize path

    if path in directory_cache:
        files = directory_cache[path]
    else:
        files = list_directory_contents(path)
        directory_cache[path] = files

    return render_template('index.html', path=path, files=files, breadcrumb_parts=_generate_breadcrumbs(path))

def list_directory_contents(path):
    blob_list = container_client.list_blobs(name_starts_with=path)

    files = []
    directories = set()

    for blob in blob_list:
        if blob.name == path:  # Skip the base directory itself
            continue

        relative_path = blob.name[len(path):].lstrip('/')
        if '/' in relative_path:
            directory = relative_path.split('/')[0]
            if directory not in directories:
                directories.add(directory)
                files.append({
                    'name': directory,
                    'mtime': '',  # Directories don't have mtime
                    'is_dir': True,
                    'size': '',
                    'full_path': os.path.join(path, directory) + '/'
                })
        else:
            files.append({
                'name': relative_path,
                'mtime': blob.last_modified.strftime('%Y-%m-%d %H:%M:%S'),
                'is_dir': False,
                'size': blob.size,
                'full_path': blob.name
            })

    return files

def _generate_breadcrumbs(path):
    path_parts = path.strip('/').split('/')
    breadcrumb_parts = [{'name': part, 'url': '/' + '/'.join(path_parts[:i + 1]) + '/'} for i, part in enumerate(path_parts) if part]
    return breadcrumb_parts

if __name__ == '__main__':
    app.run(debug=True)  # Run the Flask app in debug mode

index.html：

<!-- templates/index.html -->
<!DOCTYPE html>
<html>
<head>
    <title>Directory Listing - {{ path }}</title>
</head>
<body>
    <h1>Directory Listing - {{ path }}</h1>
    <ul>
        {% for file in files %}
            <li>
                {% if file.is_dir %}
                    <a href="{{ file.full_path }}">{{ file.name }}/</a>
                {% else %}
                    <a href="{{ file.full_path }}">{{ file.name }}</a>
                {% endif %}
            </li>
        {% endfor %}
    </ul>
    <p>Breadcrumbs:</p>
    <ul>
        {% for breadcrumb in breadcrumb_parts %}
            <li><a href="{{ breadcrumb.url }}">{{ breadcrumb.name }}</a></li>
        {% endfor %}
    </ul>
</body>
</html>

上面的代码运行成功。检查下面：

enter image description here

输出：

enter image description here

```
list_directory_contents
```
函数现在直接用指定路径下的Blob填充
```
files
```
列表，在获取所有Blob后无需进行过滤。这应该可以减少不必要的操作并提高性能。

Flask Web 应用程序 [Azure] 遍历目录非常慢

问题描述投票：0回答：1

1个回答

最新问题

Flask Web 应用程序 [Azure] 遍历目录非常慢

问题描述 投票：0回答：1

1个回答

最新问题

问题描述投票：0回答：1