tf.GPUOptions不适用于Keras中的set_session()

问题描述 投票:2回答:1

我试图在我的per_process_gpu_memory_fraction中加入tf.GPUOptions()值,然后用set_session()改变Keras会话,然而,记忆分数从未真正改变。在第一次运行while循环后,保留319MB,如nvidia-smi所示

a)在调用clear_session()时永远不会被释放,并且

b)不会在while循环的下一次迭代中上升。

import GPUtil
import time

import tensorflow as tf
import numpy as np

from keras.backend.tensorflow_backend import set_session, clear_session, get_session
from tensorflow.python.framework.errors_impl import ResourceExhaustedError, UnknownError
from keras.models import Sequential
from keras.layers import Dense
from keras.utils import to_categorical


def model_trainer():
    y_pred = None
    errors = 0
    total_ram = GPUtil.getGPUs()[0].memoryTotal
    total_ram_allowed = GPUtil.getGPUs()[0].memoryTotal * 0.90
    mem_amount = 0.005 # intentionally allocated a small amount so it needs to
                       # increment the mem_amount

    x_train = np.empty((10000, 100))
    y_train = np.random.randint(0, 9, size=10000)
    y_train = to_categorical(y_train, 10)

    while y_pred is None:
        print("mem", mem_amount)
        if total_ram_allowed > total_ram * mem_amount and GPUtil.getGPUs()[0].memoryFree > total_ram * mem_amount:
            gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=mem_amount)
            config = tf.ConfigProto(
                intra_op_parallelism_threads=2,
                inter_op_parallelism_threads=2,
                gpu_options=gpu_options)

            sess = tf.Session(config=config)
            set_session(sess)
            model = Sequential()
            model.add(Dense(units=64, activation='relu', input_dim=100))
            model.add(Dense(units=1024, activation='relu'))
            model.add(Dense(units=1024, activation='relu'))
            model.add(Dense(units=1024, activation='relu'))
            model.add(Dense(units=1024, activation='relu'))
            model.add(Dense(units=1024, activation='relu'))
            model.add(Dense(units=10, activation='softmax'))
            model.compile(loss='categorical_crossentropy',
                          optimizer='sgd',
                          metrics=['accuracy'])

            try:
                print(sess)

                model.fit(x_train, y_train, epochs=5, batch_size=32)
                y_pred = model.predict(x_train)

            except (ResourceExhaustedError, UnknownError) as e:
                if mem_amount > 1.0:
                    raise ValueError('model too large for vram')
                else:
                    mem_amount += 0.05

                clear_session()
                errors += 1
                pass
        else:
            clear_session()


if __name__ == "__main__":
    model_trainer()

令人费解的是,Keras愿意参加新的会议(如get_session()电话所示),但不会应用新的GPUOptions

除了上面的例子我还尝试过:

clear_session()
del model
clear_session()
del model
gc.collect()

这些都没有发布VRAM。

我的总体目标是使用“试验和错误”,直到该过程有足够的VRAM进行训练,因为似乎没有好的方法来确定Keras模型需要多少VRAM而不仅仅运行它,这样我就可以在单个GPU上并行运行多个模型。当ResourceExhaustedError发生时,我想释放由Keras持有的VRAM,然后再尝试使用更多的VRAM。有没有办法实现这个目标?

python tensorflow keras
1个回答
1
投票

在搜索了一段时间之后,我发现Tensorflow只会接受VRAM,并且在它死之前永远不会释放它,即使使用del model,clear_session()也是如此。我也试过这里显示的方法(https://github.com/keras-team/keras/issues/9379),它使用:

from keras import backend as K
K.clear_session()

from numba import cuda
cuda.select_device(0)
cuda.close()

这导致了一个错误,因为当Tensorflow再次尝试访问GPU时,它指向内存空间的指针无效(因为它被cuda.close()杀死)。因此,解决它的唯一方法是使用进程,而不是线程(也尝试过,与以前相同的问题)。

我发现的另一件事是虽然有一些方法可以尝试估计Keras模型将使用的VRAM数量,但这并不是一种非常准确的方法。 (参见:How to determine needed memory of Keras model?)我也尝试直接从Keras层进行计算,并且变化很大,因此也不准确。所以这真的只会让你通过捕获ResourceExhaustedError并再次尝试来尝试错误。

下面是我在单个GPU上运行多个不同Keras模型的代码。

import GPUtil
import time
import multiprocessing

import tensorflow as tf
import numpy as np

from keras.backend.tensorflow_backend import set_session, clear_session, get_session
from tensorflow.python.framework.errors_impl import ResourceExhaustedError, UnknownError
from keras.models import Sequential
from keras.layers import Dense
from keras.utils import to_categorical


def model_trainer():
    mem_amount = 0.05

    x_train = np.empty((100000, 100))
    y_train = np.random.randint(0, 9, size=100000)
    y_train = to_categorical(y_train, 10)

    manager = multiprocessing.Manager()
    return_dict = manager.dict()

    def worker(mem_amount, return_dict):
        gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=mem_amount)
        config = tf.ConfigProto(
            intra_op_parallelism_threads=2,
            inter_op_parallelism_threads=2,
            gpu_options=gpu_options)
        sess = tf.Session(config=config)
        set_session(sess)

        model = Sequential()
        model.add(Dense(units=64, activation='relu', input_dim=100))
        model.add(Dense(units=1024, activation='relu'))
        model.add(Dense(units=1024, activation='relu'))
        model.add(Dense(units=2048, activation='relu'))
        model.add(Dense(units=10, activation='softmax'))
        model.compile(loss='categorical_crossentropy',
                      optimizer='sgd',
                      metrics=['accuracy'])

        try:
            get_session()

            model.fit(x_train, y_train, epochs=5, batch_size=1000)

            return_dict["valid"] = True

        except (ResourceExhaustedError, UnknownError) as e:
            return

    while "valid" not in list(return_dict.keys()):
        print("mem", mem_amount)

        total_ram = GPUtil.getGPUs()[0].memoryTotal
        total_ram_allowed = GPUtil.getGPUs()[0].memoryTotal * 0.90

        # can add in a for loop to have multiple models
        if total_ram_allowed > total_ram * mem_amount and GPUtil.getGPUs()[0].memoryFree > total_ram * mem_amount:
            p = multiprocessing.Process(target=worker, args=(mem_amount, return_dict))
            p.start()
            p.join()

            print(return_dict.values())

            if "valid" not in list(return_dict.keys()):
                if mem_amount > 1.0:
                    raise ValueError('model too large for vram')
                else:
                    mem_amount += 0.05
            else:
                break
        else:
            time.sleep(10)


if __name__ == "__main__":
    model_trainer()
© www.soinside.com 2019 - 2024. All rights reserved.