我在估算器中使用textcnn模型对一些文本进行分类。在我训练模型之后,训练后的模型以检查点的形式存储。但是,当我尝试使用相同的检查点预测相同的测试文件时,预测结果(可移植性和对数)会略有不同。
import tensorflow as tf
def line_parser(line, vocab):
def parse_content(record):
items = record.decode().strip().split()
cat = int(items[-1])
tokens = items[:-1]
token_length = len(tokens)
if token_length > FLAGS.max_sequence_length:
tokens = tokens[:FLAGS.max_sequence_length]
if token_length < FLAGS.max_sequence_length:
tokens += [FLAGS.pad_word]*(FLAGS.max_sequence_length-token_length)
return [tokens, cat]
result = tf.py_func(parse_content, [line], [tf.string, tf.int64])
ids = vocab.lookup(result[0])
ids = tf.cast(ids, tf.int64)
ids = tf.reshape(ids, [FLAGS.max_sequence_length])
label = tf.one_hot(result[1], FLAGS.num_classes, dtype=tf.int32)
return [ids, label]
def predict_line_parser(line, vocab):
def parse_content(record):
feature = record.decode().strip()
tokens = feature.split()
token_length = len(tokens)
if token_length > FLAGS.max_sequence_length:
tokens = tokens[:FLAGS.max_sequence_length]
if token_length < FLAGS.max_sequence_length:
tokens += [FLAGS.pad_word]*(FLAGS.max_sequence_length-token_length)
return [tokens]
result = tf.py_func(parse_content, [line], [tf.string])
ids = vocab.lookup(result[0])
ids = tf.cast(ids, tf.int64)
ids = tf.reshape(ids, [FLAGS.max_sequence_length])
return ids
def train_input_fn(file_paths, batch_size):
vocab = tf.contrib.lookup.index_table_from_file(FLAGS.vocab_path)
dataset = tf.data.TextLineDataset(file_paths)
dataset = dataset.map(lambda line: line_parser(line, vocab))
dataset = dataset.shuffle(1000)
dataset = dataset.batch(batch_size).repeat()
return dataset
def eval_input_fn(file_paths, batch_size):
vocab = tf.contrib.lookup.index_table_from_file(FLAGS.vocab_path)
dataset = tf.data.TextLineDataset(file_paths)
dataset = dataset.map(lambda line: line_parser(line, vocab))
dataset = dataset.batch(batch_size=batch_size)
return dataset
def predict_input_fn(file_paths, batch_size):
vocab = tf.contrib.lookup.index_table_from_file(FLAGS.vocab_path)
dataset = tf.data.TextLineDataset(file_paths)
dataset = dataset.map(lambda line:predict_line_parser(line, vocab))
dataset = dataset.batch(batch_size=batch_size)
return dataset
def create_model(features, params):
# projection from sentence with id to embedding
embedding_inputs = tf.nn.embedding_lookup(params["embedding"], features)
embedding_inputs = tf.expand_dims(embedding_inputs, axis=-1)
l2_loss = tf.constant(0.0, name="l2_loss", dtype="float64")
# convolutional layer and pooling layer
pooled_outputs = list()
for i, filter_size in enumerate(params["filter_sizes"]):
with tf.name_scope("conv_{}".format(filter_size)):
filter_shape = [filter_size, params["embedding_size"], 1, params["num_filters"]]
W = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.1, dtype="float64"), name="W")
b = tf.Variable(tf.constant(0.1, shape=[params["num_filters"]], dtype="float64"), name="b")
conv = tf.nn.conv2d(embedding_inputs, W, strides=[1, 1, 1, 1], padding="VALID", use_cudnn_on_gpu=True,
name="conv".format(filter_size))
h = tf.nn.relu(tf.nn.bias_add(conv, b), name="relu".format(filter_size))
pooled = tf.nn.max_pool(
h,
ksize=[1, params["sequence_length"] - filter_size + 1, 1, 1],
strides=[1, 1, 1, 1],
padding="VALID",
name="pool".format(filter_size)
)
pooled_outputs.append(pooled)
# concatenate all feature vector
number_filters_total = params["num_filters"] * len(params["filter_sizes"])
h_pool = tf.concat(pooled_outputs, 3)
h_pool_flat = tf.reshape(h_pool, [-1, number_filters_total])
# dropout
with tf.name_scope("dropout"):
# h_drop = tf.nn.dropout(h_pool_flat, params["dropout_keep_prob"])
h_drop = tf.nn.dropout(h_pool_flat, 1)
# fully connected layer
with tf.name_scope("output"):
W = tf.Variable(
tf.truncated_normal(shape=[number_filters_total, params["num_classes"]], stddev=0.1, dtype="float64"),
name="W")
b = tf.Variable(tf.constant(0.1, shape=[params["num_classes"]], dtype="float64"), name="b")
l2_loss += tf.nn.l2_loss(W)
l2_loss += tf.nn.l2_loss(b)
logits = tf.nn.xw_plus_b(h_drop, W, b, name="scores")
return logits, l2_loss
def model_fn_builder():
def text_cnn_model_fn(features, labels, mode, params):
logits, l2_loss = create_model(features, params)
# train mode branch
if mode == tf.estimator.ModeKeys.TRAIN:
# loss
with tf.name_scope("loss"):
losses = tf.nn.softmax_cross_entropy_with_logits_v2(labels=labels, logits=logits)
loss = tf.reduce_mean(losses) + params["l2_reg_lambda"] * l2_loss
# optimizer function
with tf.name_scope("optimizer"):
optimizer = tf.train.AdamOptimizer(params["learning_rate"])
grads_and_vars = optimizer.compute_gradients(loss)
train_op = optimizer.apply_gradients(grads_and_vars, global_step=tf.train.get_global_step())
return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op)
# eval mode branch
if mode == tf.estimator.ModeKeys.EVAL:
# loss
with tf.name_scope("loss"):
losses = tf.nn.softmax_cross_entropy_with_logits_v2(labels=labels, logits=logits)
loss = tf.reduce_mean(losses) + params["l2_reg_lambda"] * l2_loss
# predictions
with tf.name_scope("prediction"):
probability = tf.nn.softmax(logits, axis=1, name="probability")
pred = tf.argmax(probability, axis=1, name="predictions")
# metrics
with tf.name_scope("metrics"):
accuracy = tf.metrics.accuracy(labels=tf.argmax(labels, axis=1), predictions=pred)
precision = tf.metrics.precision(labels=tf.argmax(labels, axis=1), predictions=pred)
recall = tf.metrics.recall(labels=tf.argmax(labels, axis=1), predictions=pred)
tf.summary.scalar("accuracy", accuracy[1])
tf.summary.scalar("precision", precision[1])
tf.summary.scalar("recall", recall[1])
tf.summary.scalar("loss", loss)
metrics = {"accuracy": accuracy, "precision": precision, "recall": recall}
metric_hook = tf.train.LoggingTensorHook(
{"f1-score": 2 * precision[1] * recall[1] / (precision[1] + recall[1]), "precision": precision[1],
"recall": recall[1]}, every_n_iter=100)
return tf.estimator.EstimatorSpec(mode=mode, loss=loss, eval_metric_ops=metrics,
evaluation_hooks=[metric_hook])
# predict mode branch
if mode == tf.estimator.ModeKeys.PREDICT:
# predictions
with tf.name_scope("prediction"):
probability = tf.nn.softmax(logits, axis=1, name="probability")
pred = tf.argmax(probability, axis=1, name="predictions")
predictions = {
"class": pred,
"probability": probability,
}
return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)
return text_cnn_model_fn
我希望两次预测的结果相同,但是其变化如下:第一次0 \ 0.023369161474800530 \ 0.294616048844712430 \ 0.045555230048337241 \ 0.54509338305512280 \ 0.0427279660357330340 \ 0.0327641904848378840 \ 0.115427036158986130 \ 0.126627088128857170 \ 0.016055873445808320 \ 0.006454832043875243第二次0 \ 0.033890853416206360 \ 0.315636906539666030 \ 0.061850601655628521 \ 0.58910161843233460 \ 0.071847526293271440 \ 0.043554424310245220 \ 0.162903061665029350 \ 0.172148728640428160 \ 0.024373238862827060 \ 0.0109889405648392
通常,我发现了这个问题。多样性是由每次都随机生成的词嵌入向量导致的。