gcloud ml-engine预测AbortionError Failed_Precondition

Question

我是tf的新手，正在尝试创建一个模型，该模型在PyCharm中运行良好。但是，当我尝试使用gcloud ml-engine部署模型时，这给了我一个错误。在模型下面：

#!/usr/bin/env python
"""This file contains all the model information: the training steps, the batch size and the model iself."""

import tensorflow as tf


def get_training_steps():
    """Returns the number of batches that will be used to train your solution."""
    return 5000


def get_batch_size():
"""Returns the batch size that will be used by your solution."""
    return 50


def solution(features, labels, mode):
    # Input Layer (a batch of images that have 64x64 pixels and are RGB colored (3)
    input_layer = tf.reshape(features["x"], [-1, 64, 64, 3])
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())

    # Convolutional layers
    conv1 = tf.layers.conv2d(
        inputs=input_layer,
        filters=16,
        kernel_size=5,
        padding="same",
        activation=tf.nn.relu)

    # Pooling layers
    pool1 = tf.layers.max_pooling2d(inputs=conv1, pool_size=[2, 2], strides=2)

    # Convolutional Layer 2 and Pooling Layer 2
    conv2 = tf.layers.conv2d(
        inputs=pool1,
        filters=32,
        kernel_size=5,
        padding="same",
        activation=tf.nn.relu)
    pool2 = tf.layers.max_pooling2d(inputs=conv2, pool_size=[2, 2], strides=2)

    # Dense layers
    pool2_flat = tf.reshape(pool2, [-1, 16 * 16 * 32])
    dense = tf.layers.dense(inputs=pool2_flat, units=16*16*32, activation=tf.nn.relu)
    dropout = tf.layers.dropout(
        inputs=dense, rate=0.4, training=mode == tf.estimator.ModeKeys.TRAIN)

    # Logits Layer
    logits = tf.layers.dense(inputs=dropout, units=4)

    predictions = {
        # Generate predictions (for PREDICT and EVAL mode)
        "classes": tf.argmax(input=logits, axis=1),
        # Add `softmax_tensor` to the graph. It is used for PREDICT and by the
        # `logging_hook`.
        "probabilities": tf.nn.softmax(logits, name="softmax_tensor")
}

    if mode == tf.estimator.ModeKeys.PREDICT:
        return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)

    # Calculate Loss (for both TRAIN and EVAL modes)
    loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits)

    if mode == tf.estimator.ModeKeys.TRAIN:
        optimizer = tf.train.AdamOptimizer(learning_rate=0.001)
        train_op = optimizer.minimize(
            loss=loss,
            global_step=tf.train.get_global_step())
        return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op)

    if mode == tf.estimator.ModeKeys.EVAL:
        # The classes variable below exists of an tensor that contains all the predicted classes in a batch
        eval_metric_ops = {
            "accuracy": tf.metrics.accuracy(
                labels=labels, predictions=predictions["classes"])
        }
        return tf.estimator.EstimatorSpec(
            mode=mode, loss=loss, eval_metric_ops=eval_metric_ops)

{“错误”：“预测失败：模型执行期间发生错误：AbortionError（code = StatusCode.FAILED_PRECONDITION，详细信息= \”尝试使用未初始化的值conv2d / kernel \ n \ t [[{{node conv2d / kernel / read}}] ] \“）”}

下面是task.py文件：

#!/usr/bin/env python
"""This file trains the model upon all data with the arguments it got via the gcloud command"""

from functools import partial
import argparse
import json
import os

import numpy as np
import tensorflow as tf
from tensorflow.contrib.training.python.training import hparam

import trainer.data as data
import trainer.model as model


def json_serving_input_fn():
    """This function is used to do predictions on Google Cloud when receiving a json file."""
    input_ph = tf.placeholder(tf.string, shape=[None], name='image_binary')
    images = tf.map_fn(partial(tf.image.decode_jpeg, channels=3), input_ph, dtype=tf.uint8)
    images = tf.cast(images, tf.float32) / 255.
    images.set_shape([None, 64, 64, 3])

    return tf.estimator.export.ServingInputReceiver({"x": images}, {'bytes': input_ph})


SERVING_FUNCTIONS = {
    'JSON': json_serving_input_fn
}


def _get_session_config_from_env_var():
    """Returns a tf.ConfigProto instance that has appropriate device_filters set."""

    tf_config = json.loads(os.environ.get('TF_CONFIG', '{}'))

    if (tf_config and 'task' in tf_config and 'type' in tf_config['task'] and
            'index' in tf_config['task']):
        # Master should only communicate with itself and ps
        if tf_config['task']['type'] == 'master':
            return tf.ConfigProto(device_filters=['/job:ps', '/job:master'])
        # Worker should only communicate with itself and ps
        elif tf_config['task']['type'] == 'worker':
            return tf.ConfigProto(device_filters=[
                '/job:ps',
                '/job:worker/task:%d' % tf_config['task']['index']
        ])
    return None


def train_model(params):
    """The function gets the training data from the training folder and the test folder.
    Your solution in the model.py file is trained with this training data.
    The evaluation in this method is not important since all data was already used to train."""
    (train_data, train_labels) = data.create_data_with_labels("data/train/")
    (eval_data, eval_labels) = data.create_data_with_labels("data/test/")

    train_data = np.append(train_data, eval_data, axis=0)
    train_labels = np.append(train_labels, eval_labels, axis=0)

    train_input_fn = tf.estimator.inputs.numpy_input_fn(
        x={"x": train_data},
        y=train_labels,
        batch_size=model.get_batch_size(),
        num_epochs=None,
        shuffle=True)

    eval_input_fn = tf.estimator.inputs.numpy_input_fn(
        x={"x": eval_data},
        y=eval_labels,
        num_epochs=1,
        shuffle=False)

    exporter = tf.estimator.FinalExporter('exported', SERVING_FUNCTIONS[params.export_format])
    train_spec = tf.estimator.TrainSpec(train_input_fn, max_steps=model.get_training_steps())
    eval_spec = tf.estimator.EvalSpec(eval_input_fn,
                                  steps=params.eval_steps,
                                  exporters=[exporter],
                                  name='exported_eval')

    run_config = tf.estimator.RunConfig(session_config=_get_session_config_from_env_var())
    run_config = run_config.replace(model_dir=params.job_dir)


    estimator = tf.estimator.Estimator(model_fn=model.solution, config=run_config)
    tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)


if __name__ == "__main__":
    PARSER = argparse.ArgumentParser()
    PARSER.add_argument(
        '--job-dir',
        type=str,
        default='output',
        help='directory to store checkpoints'
    )
    PARSER.add_argument(
        '--eval-steps',
        help='Number of steps to run evaluation for at each checkpoint',
        default=1,
        type=int
    )
    PARSER.add_argument(
        '--export-format',
        help='The input format of the exported SavedModel binary',
        choices=['JSON', 'CSV', 'EXAMPLE'],
        default='JSON'
    )

    ARGS = PARSER.parse_args()
    tf.logging.set_verbosity('INFO')
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = str(tf.logging.__dict__['INFO'] / 10)

    HPARAMS = hparam.HParams(**ARGS.__dict__)
    train_model(HPARAMS)

[当我尝试使用gcloud ml-end预测--model（模型名称）--version（版本名称）--json-instances（json文件的目录）时，将发生错误（请参见上文）。运行文件并训练模型不会产生任何错误。有想法该怎么解决这个吗？

Answer 1

您解决了吗？我在gcloud预测时也遇到了类似的问题。

gcloud ml-engine预测AbortionError Failed_Precondition

问题描述投票：0回答：1

1个回答

最新问题

gcloud ml-engine预测AbortionError Failed_Precondition

问题描述 投票：0回答：1

1个回答

最新问题

问题描述投票：0回答：1