camada densa personalizada

import tensorflow as tf

class DenseLayer(tf.keras.layers.Layer):
    def __init__(self, units=1):
        super(DenseLayer, self).__init__()
        self.units = units
    def build(self, input_shape):
        self.w = self.add_weight("kernel",
                              shape=[int(input_shape[-1]),
                                      self.units], trainable=True)
        self.b = self.add_weight(shape=(self.units,), initializer="zeros", trainable=True)
        self.bits = self.add_weight(name="x1", shape=[int(input_shape[-1]),
                                      self.units], initializer=tf.keras.initializers.ones(), trainable=True)

    def call(self, inputs):
        return tf.nn.relu(tf.matmul(inputs, (self.w + self.bits + 1.0)) + self.b)

dense_layer = DenseLayer(1)
model = tf.keras.Sequential()
model.add(tf.keras.layers.Flatten(input_shape=(1,1)))
model.add(dense_layer)
model.add(tf.keras.layers.Dense(2, activation='softmax'))
print(model.summary())
dataset = tf.data.Dataset.from_tensor_slices((tf.random.normal((50, 1, 1)), tf.random.uniform((50, ), maxval=2, dtype=tf.int32))).batch(2)
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy()
optimizer = tf.keras.optimizers.Adam(learning_rate=0.01)
for i, (x_batch_train, y_batch_train) in enumerate(dataset):
    with tf.GradientTape() as tape:
        y = model(x_batch_train, training = True)
        loss = loss_fn(y_batch_train, y)
        val = tape.gradient(loss, model.trainable_weights)
        for v in val:
            print(v)
    optimizer.apply_gradients(zip(val, model.trainable_variables))
Jirayu Kaewprateep