我正在实施一个具有“学习目的”的神经网络。
Y 有以下模板类:
问题是参数类是模板化的,当将它作为优化器的 de 更新方法的引用传递时,编译器抱怨虚拟方法不能模板化,如果我将它作为非模板化传递,它会抱怨它不没有任何模板化参数。
还尝试了 CRTP 习惯用法来使策略模式静态化,因此我可以将模板化参数传递给虚拟方法,但问题是编译器抱怨我传递给我将优化器存储在参数中的指针的基类应该模板化。
#include<iostream>
using size_type = std::size_t;
#include<vector>
#include<random>
#include<memory>
// #include "tensor_algebra.h" here I define alias for Tensor operations
// for avoid using global namespaces
// but for testig you can use the following include
#include <Fastor/Fastor.h>
using namespace Fastor;
template<size_type input_features, size_type output_features>
struct Parameters;
class Optimizer{
public:
virtual void update(Parameters& parameters) = 0;
virtual ~Optimizer() = default;
};
template<size_type input_features, size_type output_features>
struct Parameters{
Tensor<float, input_features, output_features> weight;
Tensor<float, input_features, output_features> weight_gradient;
Tensor<float, output_features> bias;
Tensor<float, output_features> bias_gradient;
Parameters(const std::string& initializer = "he")
: bias(0,0) {
std::random_device rd;
std::mt19937 generator(rd());
std::normal_distribution<float> distribution;
switch(initializer){
case "he":
distribution = std::normal_distribution<float>(0, std::sqrt(2.0 / input_features));
break;
case "xavier":
distribution = std::normal_distribution<float>(0, std::sqrt(2.0 / input_features + output_features));
break;
default:
std::cout << "Invalid initializer" << std::endl;
break;
}
for(auto i = 0; i < input_features; ++i){
for(auto j = 0; j < output_features; ++j){
weight(i, j) = distribution(generator);
}
}
}
void update(){
optimizer->update(*this);
}
};
class SGD : public Optimizer{
public:
SGD(float learning_rate) : learning_rate_(learning_rate) {}
void update(Parameters& parameters){
parameters.weight -= learning_rate_ * parameters.weight_gradient;
parameters.bias -= learning_rate_ * parameters.bias_gradient;
}
private:
float learning_rate_;
};
//Linear layer class
template<size_type input_features, size_type output_features>
class Linear{
public:
Linear(float learning_rate, const std::string& initializer = "he")
: parameters(learning_rate, initializer){}
void set_optimizer(std::shared_ptr<Optimizer> optimizer){
parameters.optimizer = optimizer;
} //Seting optimizer in linear layer.
//forward method
template<size_type batch_size>
Tensor<float, batch_size, output_features> forward(const Tensor<float, batch_size, input_features> &input){
Tensor<float,batch_size> ones(1);
return matmul(input, parameters.weight) + outer(ones, parameters.bias);
}
//backward method
template<size_type batch_size>
Tensor<float, batch_size, input_features> backward(
const Tensor<float, batch_size, output_features> &gradient,
const Tensor<float, batch_size, input_features>& input
){
parameters.weight_gradient = matmul(transpose(input), gradient);
parameters.bias_gradient = 0.0; // Initialize bias_gradient with zeros
for (size_t i = 0; i < batch_size; i++) {
for (size_t j = 0; j < output_features; j++){
parameters.bias_gradient(j) += gradient(i, j);
}
}
Tensor<float, batch_size, input_features> input_gradient = matmul(gradient, transpose(parameters.weight));
return input_gradient;
}
private:
Parameters<input_features, output_features> parameters;
};
int main(){
Linear<2,3> linear(0.01);
linear.set_optimizer(std::make_shared<SGD>(0.01));
Tensor<float, 2, 2> input = {{1, 2}, {3, 4}};
Tensor<float, 2, 3> output = linear.forward(input);
std::cout << output << std::endl;
return 0;
}
我试过这个:
template<class Derived>
class Optimizer{
public:
Derived& self(){return static_cast<Derived&>(*this);}
const Derived& self() const {return static_cast<const Derived&>(*this);}
template<size_type input_size, size_type output_size>
void update(Parameters<input_size,output_size>& parameters){
self().update(parameters);
}
};
class SGD : public Optimizer<SGD>{
public:
SGD(float learning_rate) : learning_rate_(learning_rate) {}
template<size_type input_size, size_type output_size>
void update(Parameters<input_size,output_size>& parameters){
parameters.weight -= learning_rate_ * parameters.weight_gradient;
parameters.bias -= learning_rate_ * parameters.bias_gradient;
}
private:
float learning_rate_;
};
但似乎不起作用。有什么建议吗?