在 Tensorflow federated 中工作时遇到“学习属性”错误

问题描述 投票:0回答:1

我正在使用 NBIOT 数据集,其中我仅选择了良性、mirai 和 gagfyt 攻击的 Provision_PT_737E_Security_Camera 数据集。

错误是:

AttributeError          Traceback (most recent call last)
<ipython-input-78-ca197d89c56d> in <cell line: 1>()
----> 1 train_metrics = evaluation(metrics.state, federated_train_data)

AttributeError: 'collections.OrderedDict' object has no attribute 'state'

我的做法正确吗?或者如何改进我正在做的事情?

!pip install tensorflow-federated

import numpy as np
import pandas as pd

from google.colab import drive
drive.mount('/content/drive')

import os

benign_df = pd.read_csv('/content/drive/MyDrive/Collab Input dataset/5.benign.csv')
g_c_df = pd.read_csv('/content/drive/MyDrive/Collab Input dataset/5.gafgyt.combo.csv')
g_j_df = pd.read_csv('/content/drive/MyDrive/Collab Input dataset/5.gafgyt.junk.csv')
g_s_df = pd.read_csv('/content/drive/MyDrive/Collab Input dataset/5.gafgyt.scan.csv')
g_t_df = pd.read_csv('/content/drive/MyDrive/Collab Input dataset/5.gafgyt.tcp.csv')
g_u_df = pd.read_csv('/content/drive/MyDrive/Collab Input dataset/5.gafgyt.udp.csv')
m_a_df = pd.read_csv('/content/drive/MyDrive/Collab Input dataset/5.mirai.ack.csv')
m_sc_df = pd.read_csv('/content/drive/MyDrive/Collab Input dataset/5.mirai.scan.csv')
m_sy_df = pd.read_csv('/content/drive/MyDrive/Collab Input dataset/5.mirai.syn.csv')
m_u_df = pd.read_csv('/content/drive/MyDrive/Collab Input dataset/5.mirai.udp.csv')
m_u_p_df = pd.read_csv('/content/drive/MyDrive/Collab Input dataset/5.mirai.udpplain.csv')

benign_df['type'] = 'benign'
m_u_df['type'] = 'mirai_udp'
g_c_df['type'] = 'gafgyt_combo'
g_j_df['type'] = 'gafgyt_junk'
g_s_df['type'] = 'gafgyt_scan'
g_t_df['type'] = 'gafgyt_tcp'
g_u_df['type'] = 'gafgyt_udp'
m_a_df['type'] = 'mirai_ack'
m_sc_df['type'] = 'mirai_scan'
m_sy_df['type'] = 'mirai_syn'
m_u_p_df['type'] = 'mirai_udpplain'

df = pd.concat([benign_df, m_u_df, g_c_df,
                g_j_df, g_s_df, g_t_df,
                g_u_df, m_a_df, m_sc_df,
                m_sy_df, m_u_p_df],
                axis=0, sort=False, ignore_index=True)

df["type"].value_counts()


from matplotlib import pyplot as plt

plt.title("Class Distribution")
df.groupby("type").size().plot(kind='pie', autopct='%.2f', figsize=(20,10))

df.info()

df = df.sample(frac=1).reset_index(drop=True)

df.head()

import random

num_client = 4

df["client"] = ["client_{}".format(random.randint(1, num_client)) for _ in range(df.shape[0])]

from sklearn.model_selection import train_test_split

train_df, test_df = train_test_split(df, test_size=0.2, random_state=42, stratify=df["type"])

features = list(train_df.columns)
features.remove("type")
features.remove("client")


from sklearn.preprocessing import LabelEncoder

label_encoder = LabelEncoder()
train_df["type"] = label_encoder.fit_transform(train_df["type"])
test_df["type"] = label_encoder.transform(test_df["type"])

from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
train_df[features] = scaler.fit_transform(train_df[features])
test_df[features] = scaler.transform(test_df[features])

train_df[features] = train_df[features].astype("float32")
test_df[features] = test_df[features].astype("float32")

train_df["type"] = train_df["type"].astype("int32")
test_df["type"] = test_df["type"].astype("int32")

import nest_asyncio
nest_asyncio.apply()

%load_ext tensorboard

import collections

import numpy as np
import tensorflow as tf
import tensorflow_federated as tff

np.random.seed(0)

tff.federated_computation(lambda: 'Hello, World!')()

client_id_colname = 'client'

client_ids = df[client_id_colname].unique()

train_client_ids = pd.DataFrame(client_ids).sample(frac=0.8).values.ravel().tolist()
test_client_ids = [x for x in client_ids if x not in train_client_ids]

train_client_ids

from collections import OrderedDict
from tensorflow.keras.utils import to_categorical

NUM_EPOCHS = 1
SHUFFLE_BUFFER = 100
PREFETCH_BUFFER = 5

def create_tf_dataset_for_client_fn(client_id):
    client_data = dataframe[dataframe[client_id_colname] == client_id]
    client_data_dict = OrderedDict()
    client_data_dict["features"] = np.array(client_data[features].values, dtype="float32")
    client_data_dict["label"] = np.array(client_data["type"].values, dtype="int32")

    dataset = tf.data.Dataset.from_tensor_slices(client_data_dict)
    dataset = dataset.shuffle(SHUFFLE_BUFFER).batch(1).repeat(NUM_EPOCHS)
    return dataset

dataframe = train_df
train_data = tff.simulation.datasets.ClientData.from_clients_and_tf_fn(
    client_ids=train_client_ids,
    serializable_dataset_fn=create_tf_dataset_for_client_fn)

dataframe = test_df
test_data = tff.simulation.datasets.ClientData.from_clients_and_tf_fn(
    client_ids=test_client_ids,
    serializable_dataset_fn=create_tf_dataset_for_client_fn)

train_data.element_type_structure

test_data.element_type_structure

example_dataset = train_data.create_tf_dataset_for_client(train_data.client_ids[0])

example_element = next(iter(example_dataset))

example_element['label'].numpy()

from collections import defaultdict

f = plt.figure(figsize=(20, 10))
f.suptitle('Label Counts for a Sample of Clients')
for i, c_ids in enumerate(train_data.client_ids):
    client_dataset = train_data.create_tf_dataset_for_client(c_ids)
    plot_data = defaultdict(list)
    for example in client_dataset:
        label = example['label'].numpy()[0]
        plot_data[label].append(label)
    plt.subplot(2, 4, i+1)
    plt.title('Client {}'.format(c_ids))
    for j in range(10):
        plt.hist(plot_data[j], density=False, bins=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10])

import collections

NUM_EPOCHS = 5
BATCH_SIZE = 128
SHUFFLE_BUFFER = 100
PREFETCH_BUFFER = 10

def preprocess(dataset):
    def batch_format_fn(element):
        return collections.OrderedDict(x=tf.reshape(element['features'], [-1, len(features)]),
                                       y=tf.reshape(element['label'], [-1, 1]))

    return dataset.repeat(NUM_EPOCHS).shuffle(SHUFFLE_BUFFER, seed=1).batch(
      BATCH_SIZE).map(batch_format_fn).prefetch(PREFETCH_BUFFER)

preprocessed_example_dataset = preprocess(example_dataset)

sample_batch = tf.nest.map_structure(lambda x: x.numpy(),
                                     next(iter(preprocessed_example_dataset)))

from tqdm import tqdm

def make_federated_data(client_data, client_ids):
    return [preprocess(client_data.create_tf_dataset_for_client(x)) for x in tqdm(client_ids)]

NUM_CLIENTS = len(np.unique(train_df[client_id_colname]))

sample_clients = train_data.client_ids[0:NUM_CLIENTS]

federated_train_data = make_federated_data(train_data, sample_clients)

print('Number of client datasets: {l}'.format(l=len(federated_train_data)))
print('First dataset: {d}'.format(d=federated_train_data[0]))

def create_keras_model():
    filters = 32
    input_shape = (len(features))
    num_classes = len(label_encoder.classes_)
    clf = tf.keras.models.Sequential(
        [
            tf.keras.layers.Dense(64, input_dim=input_shape, activation='relu'),
            tf.keras.layers.Dense(32, activation='relu'),
            tf.keras.layers.Dropout(0.2),
            tf.keras.layers.Dense(num_classes, activation='softmax')
        ])
    return clf

keras_model = create_keras_model()
keras_model.summary()


import keras.backend as K

class F1Score(tf.keras.metrics.Metric):
    def __init__(self, name='F1-Score', **kwargs):
        super(F1Score, self).__init__(name=name, **kwargs)
        self.f1_score = self.add_weight(name='f1_score', initializer='zeros')

    def update_state(self, y_true, y_pred, sample_weight=None):
        true_positives = tf.math.reduce_sum(tf.math.round(tf.clip_by_value(y_true * y_pred, 0, 1)))

        possible_positives = tf.math.reduce_sum(tf.math.round(tf.clip_by_value(y_true, 0, 1)))
        recall = true_positives / (possible_positives + K.epsilon())

        predicted_positives = tf.math.reduce_sum(tf.math.round(tf.clip_by_value(y_pred, 0, 1)))
        precision = true_positives / (predicted_positives + K.epsilon())

        self.f1_score.assign(2*((precision*recall)/(precision+recall+K.epsilon())))

    def result(self):
        return self.f1_score

    def reset_states(self):
        self.f1_score.assign(0.0)


from keras.metrics import Recall, Precision

def model_fn():
    keras_model = tf.keras.models.Sequential([ tf.keras.layers.Dense(64, input_dim=115, activation='relu'),
            tf.keras.layers.Dense(len(features), activation='relu'),
            tf.keras.layers.Dropout(0.2),
            tf.keras.layers.Dense(len(label_encoder.classes_), activation='softmax')])  # Your model structure
    return tff.learning.models.from_keras_model(
        keras_model=keras_model,
        input_spec=preprocessed_example_dataset.element_spec,  # Define the expected input format
        loss=tf.keras.losses.SparseCategoricalCrossentropy(),
        metrics=[tf.keras.metrics.SparseCategoricalAccuracy()])

keras_model = create_keras_model()
keras_model.summary()

iterative_process = tff.learning.algorithms.build_weighted_fed_avg(
    model_fn=model_fn,
    client_optimizer_fn=lambda: tf.keras.optimizers.SGD(learning_rate=1),
    server_optimizer_fn=lambda: tf.keras.optimizers.SGD(learning_rate=2))

str(iterative_process.initialize.type_signature)

state = iterative_process.initialize()

state, metrics = iterative_process.next(state, federated_train_data)
print('round  1, metrics={}'.format(metrics))

NUM_ROUNDS = 11
for round_num in range(2, NUM_ROUNDS):
    state, metrics = iterative_process.next(state, federated_train_data)
    print('round {:2d}, metrics={}'.format(round_num, metrics))

train_logdir = "training/"
os.makedirs(train_logdir, exist_ok=True)

test_logdir = "testing/"
os.makedirs(test_logdir, exist_ok=True)

summary_writer = tf.summary.create_file_writer(train_logdir)
state = iterative_process.initialize()

with summary_writer.as_default():
    for round_num in range(1, NUM_ROUNDS):
        state, metrics = iterative_process.next(state, federated_train_data)
        client_work_metrics = metrics['client_work']
        for name, value in client_work_metrics['train'].items():
            tf.summary.scalar(name, value, step=round_num)




!ls {train_logdir}
%tensorboard --logdir {train_logdir} --port=0

ModelVariables = collections.namedtuple('ModelVariables', 'weights bias num_examples loss_sum accuracy_sum')

def create_model_variables():
    return ModelVariables(
        weights=tf.Variable(
            lambda: tf.zeros(dtype=tf.float32, shape=(len(features), len(label_encoder.classes_))),
            name='weights',
            trainable=True),
        bias=tf.Variable(
            lambda: tf.zeros(dtype=tf.float32, shape=(len(label_encoder.classes_))),
            name='bias',
            trainable=True),
        num_examples=tf.Variable(0.0, name='num_examples', trainable=False),
        loss_sum=tf.Variable(0.0, name='loss_sum', trainable=False),
        accuracy_sum=tf.Variable(0.0, name='accuracy_sum', trainable=False))

def predict_on_batch(variables, x):
    return tf.nn.softmax(tf.matmul(x, variables.weights) + variables.bias)

def model_forward_pass(variables, batch):
    y = predict_on_batch(variables, batch['x'])
    predictions = tf.cast(tf.argmax(y, 1), tf.int32)

    flat_labels = tf.reshape(batch['y'], [-1])
    loss = -tf.reduce_mean(tf.reduce_sum(tf.one_hot(flat_labels, len(label_encoder.classes_)) * tf.math.log(y), axis=[1]))
    accuracy = tf.reduce_mean(tf.cast(tf.equal(predictions, flat_labels), tf.float32))
    num_examples = tf.cast(tf.size(batch['y']), tf.float32)
    variables.num_examples.assign_add(num_examples)
    variables.loss_sum.assign_add(loss * num_examples)
    variables.accuracy_sum.assign_add(accuracy * num_examples)
    return loss, predictions

def get_local_model_metrics(variables):
    return collections.OrderedDict(
        num_examples=variables.num_examples,
        loss=variables.loss_sum / variables.num_examples,
        accuracy=variables.accuracy_sum / variables.num_examples)

@tff.federated_computation
def aggregate_model_metrics_across_clients(metrics):
    return collections.OrderedDict(
        num_examples=tff.federated_sum(metrics.num_examples),
        loss=tff.federated_mean(metrics.loss, metrics.num_examples),
        accuracy=tff.federated_mean(metrics.accuracy, metrics.num_examples))


def reset_metrics(self):
    self._variables.num_examples.assign(0)
    self._variables.loss_sum.assign(0.0)
    self._variables.accuracy_sum.assign(0.0)

from typing import Callable, List, OrderedDict

class IOTModel(tff.learning.models.VariableModel):
    def reset_metrics(self):
      self._variables.num_examples.assign(0)
      self._variables.loss_sum.assign(0.0)
      self._variables.accuracy_sum.assign(0.0)

    def __init__(self):
        self._variables = create_model_variables()
    @property
    def trainable_variables(self):
        return [self._variables.weights, self._variables.bias]

    @property
    def non_trainable_variables(self):
        return []

    @property
    def local_variables(self):
        return [
            self._variables.num_examples, self._variables.loss_sum,
            self._variables.accuracy_sum
        ]

    @property
    def input_spec(self):
        return OrderedDict(
            x=tf.TensorSpec([None, len(features)], tf.float32),
            y=tf.TensorSpec([None, 1], tf.int32))

    @tf.function
    def predict_on_batch(self, x, training=True):
        del training
        return predict_on_batch(self._variables, x)

    @tf.function
    def forward_pass(self, batch, training=True):
        del training
        loss, predictions = model_forward_pass(self._variables, batch)
        num_exmaples = tf.shape(batch['x'])[0]
        return tff.learning.models.BatchOutput(loss=loss, predictions=predictions, num_examples=num_exmaples)

    @tf.function
    def report_local_outputs(self):
        return get_local_model_metrics(self._variables)

    @property
    def federated_output_computation(self):
        return aggregate_model_metrics_across_clients

    @tf.function
    def report_local_unfinalized_metrics(self) -> OrderedDict[str, List[tf.Tensor]]:
        """Creates an `OrderedDict` of metric names to unfinalized values."""
        return collections.OrderedDict(
            num_examples=[self._variables.num_examples],
            loss=[self._variables.loss_sum, self._variables.num_examples],
            accuracy=[self._variables.accuracy_sum, self._variables.num_examples])

    def metric_finalizers(self) -> OrderedDict[str, Callable[[List[tf.Tensor]], tf.Tensor]]:
        """Creates an `OrderedDict` of metric names to finalizers."""
        return collections.OrderedDict(
            num_examples=tf.function(func=lambda x: x[0]),
            loss=tf.function(func=lambda x: x[0] / x[1]),
            accuracy=tf.function(func=lambda x: x[0] / x[1]))

iterative_process = tff.learning.algorithms.build_weighted_fed_avg (
    IOTModel,
    client_optimizer_fn=lambda: tf.keras.optimizers.Adam(learning_rate=0.001))

state = iterative_process.initialize()

state, metrics = iterative_process.next(state, federated_train_data)
print('round  1, metrics={}'.format(metrics))

for round_num in range(2, 11):
    state, metrics = iterative_process.next(state, federated_train_data)
    print('round {:2d}, metrics={}'.format(round_num, metrics))

evaluation = tff.learning.algorithms.build_weighted_fed_avg(IOTModel)

 **str(evaluation.get_model_weights)**                                          //Error is Here

train_metrics = evaluation(metrics.state, federated_train_data)

str(train_metrics)

NUM_CLIENTS = len(np.unique(test_df[client_id_colname]))

sample_clients = test_data.client_ids[0:NUM_CLIENTS]

federated_test_data = make_federated_data(test_data, sample_clients)

len(federated_test_data), federated_test_data[0]

test_metrics = evaluation(state.model, federated_test_data)
python tensorflow machine-learning tensorflow-federated nb-iot
1个回答
0
投票

训练中使用的

metrics
返回的
iterative_process.next
对象没有
state
属性,因此在以下行中引发错误:

train_metrics = evaluation(metrics.state, federated_train_data)

evaluation
的定义可能需要更改,因为它被定义为training方法(FedAvg,不是评估方法):

evaluation = tff.learning.algorithms.build_weighted_fed_avg(IOTModel)

我建议遵循图像分类联邦学习中的教程,特别是查看用于训练的

tff.learning.algorithms.build_weighted_fed_avg
的API用法,以及用于评估的
tff.learning.algorithms.build_fed_eval
的API用法。

© www.soinside.com 2019 - 2024. All rights reserved.