使用 Bert Tokenizer NER 继续解析

问题描述 投票:0回答:0
import argparse
import numpy as np
import torch
from transformers import BertForTokenClassification, BertTokenizerFast
from torch.utils.data import Dataset, DataLoader, RandomSampler, SequentialSampler
from torch.optim import Adam
from utils import trim_entity_spans, convert_goldparse, ResumeDataset, tag2idx, idx2tag, get_hyperparameters, train_and_val_model


parser = argparse.ArgumentParser(description='Train Bert-NER')
parser.add_argument('-e', type=int, default=5, help='number of epochs')
parser.add_argument('-o', type=str, default='.',
                    help='output path to save model state')

args = parser.parse_args().__dict__

output_path = args['o']

MAX_LEN = 500
EPOCHS = args['e']
MAX_GRAD_NORM = 1.0
MODEL_NAME = 'bert-base-uncased'

TOKENIZER = BertTokenizerFast.from_pretrained(MODEL_NAME, lowercase=True)

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

data = trim_entity_spans(convert_goldparse("D:\Intern VENV\resume_bert\Resume-NER\data\Resumes.json"))

total = len(data)
train_data, val_data = data[:180], data[180:]

train_d = ResumeDataset(train_data, TOKENIZER, tag2idx, MAX_LEN)
val_d = ResumeDataset(val_data, TOKENIZER, tag2idx, MAX_LEN)

train_sampler = RandomSampler(train_d)
train_dl = DataLoader(train_d, sampler=train_sampler, batch_size=8)

val_dl = DataLoader(val_d, batch_size=4)

model = BertForTokenClassification.from_pretrained(
    MODEL_NAME, num_labels=len(tag2idx))
model.to(DEVICE)
optimizer_grouped_parameters = get_hyperparameters(model, True)
optimizer = Adam(optimizer_grouped_parameters, lr=3e-5)

train_and_val_model(
    model,
    TOKENIZER,
    optimizer,
    EPOCHS,
    idx2tag,
    tag2idx,
    MAX_GRAD_NORM,
    DEVICE,
    train_dl,
    val_dl
)

torch.save(
    {
        "model_state_dict": model.state_dict()
    },
    f'{output_path}/model-state.bin',
)

错误:

esume_bert\Resume-NER\data\Resumes.jsonVENV 错误 = [Errno 22] 无效 参数:'D:\Intern VENV esume_bert\Resume-NER\data\Resumes.json' Traceback(大多数 最近通话最后一次):文件“d:\Intern VENV esume_bert\Resume-NER\utils.py”,第 16 行,在 convert_goldparse 中 with open(dataturks_JSON_FilePath, 'r') as f: OSError: [Errno 22] 无效参数:'D:\Intern VENV esume_bert\Resume-NER\data\Resumes.json' Traceback(大多数 最近通话最后一次):文件“d:\Intern VENV esume_bert\Resume-NER rain.py”,第 28 行,在 data = trim_entity_spans(convert_goldparse("D:\Intern VENV esume_bert\Resume-NER\data\Resumes.json")) 文件 "d:\Intern VENV esume_bert\Resume-NER\utils.py”,第 65 行,在 trim_entity_spans 中 对于文本,数据中的注释:TypeError: 'NoneType' object is not iterable

python parsing bert-language-model named-entity-recognition resume
© www.soinside.com 2019 - 2024. All rights reserved.