我正在尝试验证 Excel 工作表。这是我的架构和验证代码。根据文档,它应该像调用验证函数一样简单,并且它会打印任何错误,但它对我不起作用。我已确保架构是有效的 json 文件。我不断得到 ”
Traceback (most recent call last):
File "/home/user/dev_projects/main2.py", line 57, in <module>
errors = validate_schema(df,schema)
^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/user/dev_projects/app_functions/validation.py", line 47, in validate_schema
schema.validate(df, lazy=True)
^^^^^^^^^^^^^^^
AttributeError: 'dict' object has no attribute 'validate'
"
validation snipet
for sheet_name in xls.sheet_names:
print(f'validating {sheet_name}')
df = pd.read_excel(xls, sheet_name)
# check if schema exists
schema_file = (os.path.join(schema_folder, f'{sheet_name}.json'))
print(f'the schema file is {schema_file}')
if not os.path.exists(schema_file):
print('no schema file found')
# load schema
try:
with open(schema_file, "r") as f:
schema = json.load(f)
except json.JSONDecodeError as e:
print(f'Error decoding json in {schema_file}: {e.msg} at line {e.lineno}, column {e.colno}')
try:
schema.validate(df, lazy=True)
except pa.errors.SchemaError as e:
print(e)
架构.json
{
"schema_type": "dataframe",
"version": "0.13.4",
"columns": {
"mgmt_vip": {
"dtype": "str",
"nullable": false,
"checks": [
{
"type": "ipv4_address",
"coerce": true,
"error": "Value is not a valid IP Address"
}
],
"unique": true,
"required": true
},
"mgmt_gw": {
"dtype": "str",
"nullable": false,
"checks": [
{
"type": "ipv4_address",
"coerce": true,
"error": "Value is not a valid IP Address"
}
],
"unique": true,
"required": true
},
"ce_name": {
"dtype": "str",
"nullable": false,
"required": true
},
"domain": {
"dtype": "str",
"nullable": false,
"checks": [
{
"type": "condition",
"condition": "lamda x: '.' in x and x.count('.') >= 2 and x[0] != '.' and x[-1] != '.'",
"error": "Value is not an FQDN"
}
],
"unique": true,
"required": true
},
"Location": {
"dtype": "str",
"nullable": false,
"unique": false,
"required": true
},
"Radius1": {
"dtype": "str",
"nullable": false,
"checks": [
{
"type": "ipv4_address",
"coerce": true,
"error": "Value is not a valid IP Address"
}
],
"unique": true,
"required": true
},
"Radius2": {
"dtype": "str",
"nullable": false,
"checks": [
{
"type": "ipv4_address",
"coerce": true,
"error": "Value is not a valid IP Address"
}
],
"unique": true,
"required": true
},
"Radius_Pw": {
"dtype": "str",
"nullable": false,
"unique": false,
"required": true
},
"routing_engine": {
"dtype": "str",
"nullable": false,
"checks": [
{
"type": "isin",
"isin": ["local", "remote"],
"error": "Value must be local or remote"
}
]
},
"Montitoring_IP": {
"dtype": "str",
"nullable": false,
"checks": [
{
"type": "ipv4_address",
"coerce": true,
"error": "Value is not a valid IP Address"
}
]
},
"Rest_User": {
"dtype": "str",
"nullable": false,
"unique": false,
"required": true
},
"Rest_Password": {
"dtype": "str",
"nullable": false,
"unique": false,
"required": true
}
}
}
我在这上面花了几个小时,但无法验证架构,它一直给我一个错误,提示没有验证属性
所以文档显示有一个验证方法并且它有效。
import pandas as pd
import pandera as pa
from pandera import Check, Column, DataFrameSchema
df = pd.DataFrame({"column": ["a", "b", "c"]})
schema = pa.DataFrameSchema({"column": Column(int)})
print(type(schema))
schema.validate(df)
OUTPUT
SchemaError: expected series 'column' to have type int64, got object
模式是类型
<class 'pandera.api.pandas.container.DataFrameSchema'>
我正在使用
print(pa.__version__)
0.18.3
那么,如果您输入
module
那么您想要执行什么操作来检查您的模式,那么您就知道您已经导入了模块而不是来自模块的模式。
import altered_scheme
type(altered_schema)
OUTPUT
module
试试这个
from altered_scheme import schema
type(altered_schema)
OUTPUT
pandera.api.pandas.container.DataFrameSchema