这里我有 Flask 应用程序使用 pickle 数据执行搜索,我的代码可以在本地主机上运行,但不能在 AWS 服务器上运行,它显示属性错误。我发现在 if name == 'main': 下加载 pickle 文件可以解决,但仍然遇到相同的错误
在网页上显示 内部服务器错误
The server encountered an internal error and was unable to complete your request. Either the server is overloaded or there is an error in the application.
在控制台上显示以下错误
Traceback (most recent call last):
File "/home/ubuntu/myfit/lib/python3.10/site-packages/flask/app.py", line 2525, in wsgi_app
response = self.full_dispatch_request()
File "/home/ubuntu/myfit/lib/python3.10/site-packages/flask/app.py", line 1822, in full_dispatch_request
rv = self.handle_user_exception(e)
File "/home/ubuntu/myfit/lib/python3.10/site-packages/flask/app.py", line 1820, in full_dispatch_request
rv = self.dispatch_request()
File "/home/ubuntu/myfit/lib/python3.10/site-packages/flask/app.py", line 1796, in dispatch_request
return self.ensure_sync(self.view_functions[rule.endpoint])(**view_args)
File "/home/ubuntu/myfit/app.py", line 62, in search
docs = pickle.load(file)
AttributeError: Can't get attribute 'Document' on <module '__main__' from '/home/ubuntu/myfit/bin/flask'>
这是代码
from flask import Flask, render_template, request, jsonify
import numpy as np
import json
from sentence_transformers import SentenceTransformer
import faiss
import pickle
app = Flask(__name__, template_folder='templates')
@app.route('/')
def index():
return render_template('index.html')
# Load the JSON data
model = SentenceTransformer("multi-qa-MiniLM-L6-cos-v1", device="cpu")
def generate_embedding(text):
response = model.encode([text])
return np.array(response[0])
class VectorStore:
def __init__(self):
self.documents = []
self.embeddings = np.empty((0,384)) # Initialize as empty array
def add_to_store(self, document):
# Append the document to the list of documents
self.documents.append(document)
# Generate the embedding for the document
response = generate_embedding(document.content)
# Concatenate the response with the existing embeddings vertically
self.embeddings = np.vstack((self.embeddings, response))
class Document:
def __init__(self, title, url, content):
self.title = title
self.url = url
self.content = content
@app.route('/search', methods=['POST'])
def search():
query = request.form.get('query')
myresults = []
with open('docs.pkl', 'rb') as file:
docs = pickle.load(file)
store = []
store = VectorStore()
with open('output_data.json', 'r') as f:
myvector = json.load(f)
feature_batch=np.array(myvector)
# Create an index with the same dimension as the embeddings
index = faiss.IndexFlatL2(feature_batch.shape[1])
# Add the embeddings to the index
index.add(feature_batch)
faiss.write_index(index, 'index.faiss')
# Generate embedding for the given query
# Set the similarity threshold
similarityThreshold = 1
# Generate embedding for the given query
query_embedding = generate_embedding(query)
# Search for similar embeddings in the index
distances, results = index.search(np.array([query_embedding]), k=3)
# Filter the results based on the similarity threshold
filtered_results = []
for i, distance in zip(results[0], distances[0]):
if distance <= similarityThreshold:
filtered_results.append(i)
# Print the content of the documents
for i in filtered_results:
myresults.append(docs[i].content)
# Perform a simple search by checking if the query is in any data item
return render_template('index.html', myresults=myresults)
if __name__ == '__main__':
app.run(debug=True)
我确实在 if name == 'main': 下加载了 pickle 文件,但仍然遇到相同的错误
from flask import Flask, render_template, request, jsonify
import numpy as np
import json
from sentence_transformers import SentenceTransformer
import faiss
import pickle
app = Flask(__name__, template_folder='templates')
@app.route('/')
def index():
return render_template('index.html')
# Load the JSON data
model = SentenceTransformer("multi-qa-MiniLM-L6-cos-v1", device="cpu")
def generate_embedding(text):
response = model.encode([text])
return np.array(response[0])
class VectorStore:
def __init__(self):
self.documents = []
self.embeddings = np.empty((0,384)) # Initialize as empty array
def add_to_store(self, document):
# Append the document to the list of documents
self.documents.append(document)
# Generate the embedding for the document
response = generate_embedding(document.content)
# Concatenate the response with the existing embeddings vertically
self.embeddings = np.vstack((self.embeddings, response))
class Document:
def __init__(self, title, url, content):
self.title = title
self.url = url
self.content = content
@app.route('/search', methods=['POST'])
def search():
query = request.form.get('query')
myresults = []
store = []
store = VectorStore()
feature_batch=np.array(myvector)
# Create an index with the same dimension as the embeddings
index = faiss.IndexFlatL2(feature_batch.shape[1])
# Add the embeddings to the index
index.add(feature_batch)
faiss.write_index(index, 'index.faiss')
# Generate embedding for the given query
# Set the similarity threshold
similarityThreshold = 1
# Generate embedding for the given query
query_embedding = generate_embedding(query)
# Search for similar embeddings in the index
distances, results = index.search(np.array([query_embedding]), k=3)
# Filter the results based on the similarity threshold
filtered_results = []
for i, distance in zip(results[0], distances[0]):
if distance <= similarityThreshold:
filtered_results.append(i)
# Print the content of the documents
for i in filtered_results:
myresults.append(docs[i].content)
# Perform a simple search by checking if the query is in any data item
return render_template('index.html', myresults=myresults)
if __name__ == '__main__':
with open('docs.pkl', 'rb') as file:
docs = pickle.load(file)
with open('output_data.json', 'r') as f:
myvector = json.load(f)
app.run(debug=True)
我解决了,我必须做简单的改变
if __name__ == '__main__':
app.run(host='0.0.0.0', port=8000)
事实证明,AWS服务器中的flask run和python app.py命令之间存在差异。
我在使用 flask run 运行时遇到了 WSGI 问题。
如果您通过运行 python app.py 或类似工具来启动 python 可执行文件,则 if name == 'main': 块中的任何内容都会被执行而不会产生任何 isuse。