Source code for ivy.neural_net_stateful.layers

"""
Collection of Ivy neural network layers as stateful classes.
"""

# local
import ivy
from ivy.neural_net_stateful.module import Module
from ivy.neural_net_stateful.initializers import Zeros, GlorotUniform


# Linear #
# -------#

[docs]class Linear(Module):
[docs] def __init__(self, input_channels, output_channels, weight_initializer=GlorotUniform(), bias_initializer=Zeros(), with_bias=True, dev_str=None, v=None): """ Linear layer, also referred to as dense or fully connected. The layer receives tensors with input_channels last dimension and returns a new tensor with output_channels last dimension, following matrix multiplication with the weight matrix and addition with the bias vector. :param input_channels: Number of input channels for the layer. :type input_channels: int :param output_channels: Number of output channels for the layer. :type output_channels: int :param weight_initializer: Initializer for the weights. Default is GlorotUniform. :type weight_initializer: ivy.Initializer, optional :param bias_initializer: Initializer for the bias. Default is Zeros. :type bias_initializer: ivy.Initializer, optional :param with_bias: Whether or not to include a bias term, default is True. :type with_bias: bool, optional :param dev_str: device on which to create the layer's variables 'cuda:0', 'cuda:1', 'cpu' etc. Default is cpu. :type dev_str: str, optional :param v: the variables for the linear layer, as a container, constructed internally by default. :type v: ivy container of variables, optional """ self._input_channels = input_channels self._output_channels = output_channels self._w_shape = (output_channels, input_channels) self._b_shape = (output_channels,) self._w_init = weight_initializer self._b_init = bias_initializer self._with_bias = with_bias Module.__init__(self, dev_str, v)
def _create_variables(self, dev_str): """ Create internal variables for the layer """ v = {'w': self._w_init.create_variables(self._w_shape, dev_str, self._output_channels, self._input_channels)} if self._with_bias: v = dict(**v, b=self._b_init.create_variables(self._b_shape, dev_str, self._output_channels)) return v def _forward(self, inputs): """ Perform forward pass of the Linear layer. :param inputs: Inputs to process *[batch_shape, in]*. :type inputs: array :return: The outputs following the linear operation and bias addition *[batch_shape, out]* """ return ivy.linear(inputs, self.v.w, self.v.b if self._with_bias else None)
# Dropout # # --------#
[docs]class Dropout(Module):
[docs] def __init__(self, prob, scale=True): """ Dropout layer. The layer randomly zeroes some of the elements of the input tensor with probability p using samples from a Bernoull distribution. :param prob: The probability of zeroing out each array element. :type prob: float :param scale: Whether to scale the output by 1/(1-prob), default is True. :type scale: bool, optional """ self._prob = prob self._scale = scale Module.__init__(self, None, None)
def _create_variables(self, dev_str): """ Create internal variables for the layer """ return {} def _forward(self, inputs): """ Perform forward pass of the Linear layer. :param inputs: Inputs to process *[batch_shape, in]*. :type inputs: array :return: The outputs following the linear operation and bias addition *[batch_shape, out]* """ return ivy.dropout(inputs, self._prob, self._scale)
# Attention # # ----------#
[docs]class MultiHeadAttention(Module):
[docs] def __init__(self, query_dim, num_heads=8, head_dim=64, dropout_rate=0., context_dim=None, scale=None, dev_str=None, v=None, build_mode='on_init'): """ Multi Head Attention layer. :param query_dim: The dimension of the attention queries. :type query_dim: int :param num_heads: Number of attention heads. Default is 8. :type num_heads: int, optional :param head_dim: The dimension of each of the heads. Default is 64. :type head_dim: int, optional :param dropout_rate: The rate of dropout. Default is 0. :type dropout_rate: float, optional :param context_dim: The dimension of the context array. Default is None, in which case the query dim is used. :type context_dim: int, optional. :param scale: The value by which to scale the query-key similarity measure. Default is head_dim^-0.5 :type scale: float, optional :param dev_str: device on which to create the layer's variables 'cuda:0', 'cuda:1', 'cpu' etc. Default is cpu. :type dev_str: str, optional :param v: the variables for the attention layer, as a container, constructed internally by default. :type v: ivy container of variables, optional :param build_mode: How the Module is built, either on initialization (now), explicitly by the user by calling build(), or the first time the __call__ method is run. Default is on initialization. :type build_mode: str, optional """ v_exists = ivy.exists(v) v = ivy.default(v, ivy.Container({'to_q': None, 'to_kv': None, 'to_out': None})) self._query_dim = query_dim self._inner_dim = head_dim * num_heads self._dropout_rate = dropout_rate self._context_dim = ivy.default(context_dim, query_dim) self._scale = ivy.default(scale, head_dim ** -0.5) self._num_heads = num_heads ivy.Module.__init__(self, dev_str, v if v_exists else None, build_mode)
# noinspection PyAttributeOutsideInit def _build(self, *agrs, **kwargs): self._to_q = ivy.Linear(self._query_dim, self._inner_dim, with_bias=False, dev_str=self._dev_str) self._to_kv = ivy.Linear(self._context_dim, self._inner_dim * 2, with_bias=False, dev_str=self._dev_str) self._to_out = ivy.Sequential( ivy.Linear(self._inner_dim, self._query_dim, dev_str=self._dev_str), ivy.Dropout(self._dropout_rate), dev_str=self._dev_str ) def _forward(self, inputs, context=None, mask=None): """ Perform forward pass of the MultiHeadAttention layer. :param inputs: The array to determine the queries from *[batch_shape,num_queries,x_feats]*. :type inputs: array :param context: The array to determine the keys and values from. Default is None. *[batch_shape,num_values,cont_feats]*. :type context: array, optional :param mask: The mask to apply to the query-key values. Default is None. *[batch_shape,num_queries,num_values]* :type mask: array, optional :return The output following application of scaled dot-product attention. *[batch_shape,num_queries,out_feats]* """ return ivy.multi_head_attention( inputs, self._to_q, self._to_kv, self._to_out, self._scale, self._num_heads, context, mask, self.v.to_q, self.v.to_kv, self.v.to_out)
# Convolutions # # -------------#
[docs]class Conv1D(Module):
[docs] def __init__(self, input_channels, output_channels, filter_size, strides, padding, weight_initializer=GlorotUniform(), bias_initializer=Zeros(), data_format='NWC', dilations=1, dev_str='cpu', v=None): """ 1D convolutional layer. :param input_channels: Number of input channels for the layer. :type input_channels: int :param output_channels: Number of output channels for the layer. :type output_channels: int :param filter_size: Size of the convolutional filter. :type filter_size: int :param strides: The stride of the sliding window for each dimension of input. :type strides: int or sequence of ints :param padding: "SAME" or "VALID" indicating the algorithm, or list indicating the per-dimension paddings. :type padding: string or sequence of ints :param weight_initializer: Initializer for the weights. Default is GlorotUniform. :type weight_initializer: ivy.Initializer, optional :param bias_initializer: Initializer for the bias. Default is Zeros. :type bias_initializer: ivy.Initializer, optional :param data_format: "NWC" or "NCW". Defaults to "NWC". :type data_format: string :param dilations: The dilation factor for each dimension of input. :type dilations: int or sequence of ints :param dev_str: device on which to create the layer's variables 'cuda:0', 'cuda:1', 'cpu' etc. Default is cpu. :type dev_str: str, optional :param v: the variables for each of the linear layer, as a container, constructed internally by default. :type v: ivy container of variables, optional """ self._input_channels = input_channels self._output_channels = output_channels self._filter_size = filter_size self._strides = strides self._padding = padding self._w_shape = (filter_size, input_channels, output_channels) if data_format == 'NWC' \ else (input_channels, output_channels, self._filter_size) self._b_shape = (1, 1, output_channels) self._w_init = weight_initializer self._b_init = bias_initializer self._data_format = data_format self._dilations = dilations Module.__init__(self, dev_str, v)
def _create_variables(self, dev_str): """ Create internal variables for the layer """ return {'w': self._w_init.create_variables(self._w_shape, dev_str, self._output_channels, self._input_channels), 'b': self._b_init.create_variables(self._b_shape, dev_str, self._output_channels)} def _forward(self, inputs): """ Perform forward pass of the Conv1D layer. :param inputs: Inputs to process *[batch_size,w,d_in]* :type inputs: array :return: The outputs following the conv1d layer *[batch_size,new_w,d_out]* """ return ivy.conv1d(inputs, self.v.w, self._strides, self._padding, self._data_format, self._dilations) + self.v.b
[docs]class Conv1DTranspose(Module):
[docs] def __init__(self, input_channels, output_channels, filter_size, strides, padding, weight_initializer=GlorotUniform(), bias_initializer=Zeros(), output_shape=None, data_format='NWC', dilations=1, dev_str='cpu', v=None): """ 1D transpose convolutional layer. :param input_channels: Number of input channels for the layer. :type input_channels: int :param output_channels: Number of output channels for the layer. :type output_channels: int :param filter_size: Size of the convolutional filter. :type filter_size: int :param strides: The stride of the sliding window for each dimension of input. :type strides: int or sequence of ints :param padding: "SAME" or "VALID" indicating the algorithm, or list indicating the per-dimension paddings. :type padding: string or sequence of ints :param weight_initializer: Initializer for the weights. Default is GlorotUniform. :type weight_initializer: ivy.Initializer, optional :param bias_initializer: Initializer for the bias. Default is Zeros. :type bias_initializer: ivy.Initializer, optional :param output_shape: Shape of the output :type output_shape: sequence of ints, needed for TensorFlow :param data_format: "NWC" or "NCW". Defaults to "NWC". :type data_format: string :param dilations: The dilation factor for each dimension of input. :type dilations: int or sequence of ints :param dev_str: device on which to create the layer's variables 'cuda:0', 'cuda:1', 'cpu' etc. Default is cpu. :type dev_str: str, optional :param v: the variables for each of the linear layer, as a container, constructed internally by default. :type v: ivy container of variables, optional """ self._input_channels = input_channels self._output_channels = output_channels self._filter_size = filter_size self._strides = strides self._padding = padding self._w_shape = (filter_size, input_channels, output_channels) if data_format == 'NWC' \ else (input_channels, output_channels, filter_size) self._b_shape = (1, 1, output_channels) self._w_init = weight_initializer self._b_init = bias_initializer self._output_shape = output_shape self._data_format = data_format self._dilations = dilations Module.__init__(self, dev_str, v)
def _create_variables(self, dev_str): """ Create internal variables for the layer """ return {'w': self._w_init.create_variables(self._w_shape, dev_str, self._output_channels, self._input_channels), 'b': self._b_init.create_variables(self._b_shape, dev_str, self._output_channels)} def _forward(self, inputs): """ Perform forward pass of the Conv1DTranspose layer. :param inputs: Inputs to process *[batch_size,w,d_in]* :type inputs: array :return: The outputs following the conv1d layer *[batch_size,new_w,d_out]* """ return ivy.conv1d_transpose(inputs, self.v.w, self._strides, self._padding, self._output_shape, self._data_format, self._dilations) + self.v.b
[docs]class Conv2D(Module):
[docs] def __init__(self, input_channels, output_channels, filter_shape, strides, padding, weight_initializer=GlorotUniform(), bias_initializer=Zeros(), data_format='NHWC', dilations=1, dev_str='cpu', v=None): """ 2D convolutional layer. :param input_channels: Number of input channels for the layer. :type input_channels: int :param output_channels: Number of output channels for the layer. :type output_channels: int :param filter_shape: Shape of the convolutional filter. :type filter_shape: sequence of ints :param strides: The stride of the sliding window for each dimension of input. :type strides: int or sequence of ints :param padding: "SAME" or "VALID" indicating the algorithm, or list indicating the per-dimension paddings. :type padding: string or sequence of ints :param weight_initializer: Initializer for the weights. Default is GlorotUniform. :type weight_initializer: ivy.Initializer, optional :param bias_initializer: Initializer for the bias. Default is Zeros. :type bias_initializer: ivy.Initializer, optional :param data_format: "NHWC" or "NCHW". Defaults to "NHWC". :type data_format: string :param dilations: The dilation factor for each dimension of input. :type dilations: int or sequence of ints :param dev_str: device on which to create the layer's variables 'cuda:0', 'cuda:1', 'cpu' etc. Default is cpu. :type dev_str: str, optional :param v: the variables for each of the linear layer, as a container, constructed internally by default. :type v: ivy container of variables, optional """ self._input_channels = input_channels self._output_channels = output_channels self._filter_shape = filter_shape self._strides = strides self._padding = padding self._w_shape = filter_shape + [input_channels, output_channels] if data_format == 'NHWC' \ else [input_channels, output_channels] + filter_shape self._b_shape = (1, 1, 1, output_channels) self._w_init = weight_initializer self._b_init = bias_initializer self._data_format = data_format self._dilations = dilations Module.__init__(self, dev_str, v)
def _create_variables(self, dev_str): """ Create internal variables for the layer """ return {'w': self._w_init.create_variables(self._w_shape, dev_str, self._output_channels, self._input_channels), 'b': self._b_init.create_variables(self._b_shape, dev_str, self._output_channels)} def _forward(self, inputs): """ Perform forward pass of the Conv2D layer. :param inputs: Inputs to process *[batch_size,h,w,d_in]*. :type inputs: array :return: The outputs following the conv1d layer *[batch_size,new_h,new_w,d_out]* """ return ivy.conv2d(inputs, self.v.w, self._strides, self._padding, self._data_format, self._dilations) + self.v.b
[docs]class Conv2DTranspose(Module):
[docs] def __init__(self, input_channels, output_channels, filter_shape, strides, padding, weight_initializer=GlorotUniform(), bias_initializer=Zeros(), output_shape=None, data_format='NHWC', dilations=1, dev_str='cpu', v=None): """ 2D convolutional transpose layer. :param input_channels: Number of input channels for the layer. :type input_channels: int :param output_channels: Number of output channels for the layer. :type output_channels: int :param filter_shape: Shape of the convolutional filter. :type filter_shape: sequence of ints :param strides: The stride of the sliding window for each dimension of input. :type strides: int or sequence of ints :param padding: "SAME" or "VALID" indicating the algorithm, or list indicating the per-dimension paddings. :type padding: string or sequence of ints :param weight_initializer: Initializer for the weights. Default is GlorotUniform. :type weight_initializer: ivy.Initializer, optional :param bias_initializer: Initializer for the bias. Default is Zeros. :type bias_initializer: ivy.Initializer, optional :param output_shape: Shape of the output :type output_shape: sequence of ints, needed for TensorFlow :param data_format: "NHWC" or "NCHW". Defaults to "NHWC". :type data_format: string :param dilations: The dilation factor for each dimension of input. :type dilations: int or sequence of ints :param dev_str: device on which to create the layer's variables 'cuda:0', 'cuda:1', 'cpu' etc. Default is cpu. :type dev_str: str, optional :param v: the variables for each of the linear layer, as a container, constructed internally by default. :type v: ivy container of variables, optional """ self._input_channels = input_channels self._output_channels = output_channels self._filter_shape = filter_shape self._strides = strides self._padding = padding self._w_shape = filter_shape + [input_channels, output_channels] if data_format == 'NHWC' \ else [input_channels, output_channels] + filter_shape self._b_shape = (1, 1, 1, output_channels) self._w_init = weight_initializer self._b_init = bias_initializer self._output_shape = output_shape self._data_format = data_format self._dilations = dilations Module.__init__(self, dev_str, v)
def _create_variables(self, dev_str): """ Create internal variables for the layer """ return {'w': self._w_init.create_variables(self._w_shape, dev_str, self._output_channels, self._input_channels), 'b': self._b_init.create_variables(self._b_shape, dev_str, self._output_channels)} def _forward(self, inputs): """ Perform forward pass of the Conv2DTranspose layer. :param inputs: Inputs to process *[batch_size,h,w,d_in]*. :type inputs: array :return: The outputs following the conv1d layer *[batch_size,new_h,new_w,d_out]* """ return ivy.conv2d_transpose(inputs, self.v.w, self._strides, self._padding, self._output_shape, self._data_format, self._dilations) + self.v.b
[docs]class DepthwiseConv2D(Module):
[docs] def __init__(self, num_channels, filter_shape, strides, padding, weight_initializer=GlorotUniform(), bias_initializer=Zeros(), data_format='NHWC', dilations=1, dev_str='cpu', v=None): """ depthwise 2D convolutional layer. :param num_channels: Number of input channels for the layer. :type num_channels: int :param filter_shape: Shape of the convolutional filter. :type filter_shape: sequence of ints :param strides: The stride of the sliding window for each dimension of input. :type strides: int or sequence of ints :param padding: "SAME" or "VALID" indicating the algorithm, or list indicating the per-dimension paddings. :type padding: string or sequence of ints :param weight_initializer: Initializer for the weights. Default is GlorotUniform. :type weight_initializer: ivy.Initializer, optional :param bias_initializer: Initializer for the bias. Default is Zeros. :type bias_initializer: ivy.Initializer, optional :param data_format: "NHWC" or "NCHW". Defaults to "NHWC". :type data_format: string :param dilations: The dilation factor for each dimension of input. :type dilations: int or sequence of ints :param dev_str: device on which to create the layer's variables 'cuda:0', 'cuda:1', 'cpu' etc. Default is cpu. :type dev_str: str, optional :param v: the variables for each of the linear layer, as a container, constructed internally by default. :type v: ivy container of variables, optional """ self._num_channels = num_channels self._filter_shape = filter_shape self._strides = strides self._padding = padding self._w_shape = filter_shape + [num_channels] if data_format == 'NHWC' \ else [num_channels] + filter_shape self._b_shape = (1, 1, num_channels) self._w_init = weight_initializer self._b_init = bias_initializer self._data_format = data_format self._dilations = dilations Module.__init__(self, dev_str, v)
def _create_variables(self, dev_str): """ Create internal variables for the layer """ return {'w': self._w_init.create_variables(self._w_shape, dev_str, self._num_channels, self._num_channels), 'b': self._b_init.create_variables(self._b_shape, dev_str, self._num_channels)} def _forward(self, inputs): """ Perform forward pass of the DepthwiseConv2D layer. :param inputs: Inputs to process *[batch_size,h,w,d_in]*. :type inputs: array :return: The outputs following the conv1d layer *[batch_size,new_h,new_w,d_out]* """ return ivy.depthwise_conv2d(inputs, self.v.w, self._strides, self._padding, self._data_format, self._dilations)\ + self.v.b
[docs]class Conv3D(Module):
[docs] def __init__(self, input_channels, output_channels, filter_shape, strides, padding, weight_initializer=GlorotUniform(), bias_initializer=Zeros(), data_format='NDHWC', dilations=1, dev_str='cpu', v=None): """ 3D convolutional layer. :param input_channels: Number of input channels for the layer. :type input_channels: int :param output_channels: Number of output channels for the layer. :type output_channels: int :param filter_shape: Shape of the convolutional filter. :type filter_shape: sequence of ints :param strides: The stride of the sliding window for each dimension of input. :type strides: int or sequence of ints :param padding: "SAME" or "VALID" indicating the algorithm, or list indicating the per-dimension paddings. :type padding: string or sequence of ints :param weight_initializer: Initializer for the weights. Default is GlorotUniform. :type weight_initializer: ivy.Initializer, optional :param bias_initializer: Initializer for the bias. Default is Zeros. :type bias_initializer: ivy.Initializer, optional :param data_format: "NDHWC" or "NCDHW". Defaults to "NDHWC". :type data_format: string :param dilations: The dilation factor for each dimension of input. :type dilations: int or sequence of ints :param dev_str: device on which to create the layer's variables 'cuda:0', 'cuda:1', 'cpu' etc. Default is cpu. :type dev_str: str, optional :param v: the variables for each of the linear layer, as a container, constructed internally by default. :type v: ivy container of variables, optional """ self._input_channels = input_channels self._output_channels = output_channels self._filter_shape = filter_shape self._strides = strides self._padding = padding self._w_shape = filter_shape + [input_channels, output_channels] if data_format == 'NDHWC' \ else [input_channels, output_channels] + filter_shape self._b_shape = (1, 1, 1, 1, output_channels) self._w_init = weight_initializer self._b_init = bias_initializer self._data_format = data_format self._dilations = dilations Module.__init__(self, dev_str, v)
def _create_variables(self, dev_str): """ Create internal variables for the layer """ return {'w': self._w_init.create_variables(self._w_shape, dev_str, self._output_channels, self._input_channels), 'b': self._b_init.create_variables(self._b_shape, dev_str, self._output_channels)} def _forward(self, inputs): """ Perform forward pass of the Conv3D layer. :param inputs: Inputs to process *[batch_size,d,h,w,d_in]*. :type inputs: array :return: The outputs following the conv1d layer *[batch_size,new_d,new_h,new_w,d_out]* """ return ivy.conv3d(inputs, self.v.w, self._strides, self._padding, self._data_format, self._dilations) + self.v.b
[docs]class Conv3DTranspose(Module):
[docs] def __init__(self, input_channels, output_channels, filter_shape, strides, padding, weight_initializer=GlorotUniform(), bias_initializer=Zeros(), output_shape=None, data_format='NDHWC', dilations=1, dev_str='cpu', v=None): """ 3D convolutional transpose layer. :param input_channels: Number of input channels for the layer. :type input_channels: int :param output_channels: Number of output channels for the layer. :type output_channels: int :param filter_shape: Shape of the convolutional filter. :type filter_shape: sequence of ints :param strides: The stride of the sliding window for each dimension of input. :type strides: int or sequence of ints :param padding: "SAME" or "VALID" indicating the algorithm, or list indicating the per-dimension paddings. :type padding: string or sequence of ints :param weight_initializer: Initializer for the weights. Default is GlorotUniform. :type weight_initializer: ivy.Initializer, optional :param bias_initializer: Initializer for the bias. Default is Zeros. :type bias_initializer: ivy.Initializer, optional :param output_shape: Shape of the output :type output_shape: sequence of ints, needed for TensorFlow :param data_format: "NDHWC" or "NCDHW". Defaults to "NDHWC". :type data_format: string :param dilations: The dilation factor for each dimension of input. :type dilations: int or sequence of ints :param dev_str: device on which to create the layer's variables 'cuda:0', 'cuda:1', 'cpu' etc. Default is cpu. :type dev_str: str, optional :param v: the variables for each of the linear layer, as a container, constructed internally by default. :type v: ivy container of variables, optional """ self._input_channels = input_channels self._output_channels = output_channels self._filter_shape = filter_shape self._strides = strides self._padding = padding self._w_shape = filter_shape + [input_channels, output_channels] if data_format == 'NDHWC' \ else [input_channels, output_channels] + filter_shape self._b_shape = (1, 1, 1, 1, output_channels) self._w_init = weight_initializer self._b_init = bias_initializer self._output_shape = output_shape self._data_format = data_format self._dilations = dilations Module.__init__(self, dev_str, v)
def _create_variables(self, dev_str): """ Create internal variables for the layer """ return {'w': self._w_init.create_variables(self._w_shape, dev_str, self._output_channels, self._input_channels), 'b': self._b_init.create_variables(self._b_shape, dev_str, self._output_channels)} def _forward(self, inputs): """ Perform forward pass of the Conv3DTranspose layer. :param inputs: Inputs to process *[batch_size,d,h,w,d_in]*. :type inputs: array :return: The outputs following the conv1d layer *[batch_size,new_d,new_h,new_w,d_out]* """ return ivy.conv3d_transpose(inputs, self.v.w, self._strides, self._padding, self._output_shape, self._data_format, self._dilations) + self.v.b
# LSTM # # -----#
[docs]class LSTM(Module):
[docs] def __init__(self, input_channels, output_channels, weight_initializer=GlorotUniform(), num_layers=1, return_sequence=True, return_state=True, dev_str='cpu', v=None): """ LSTM layer, which is a set of stacked lstm cells. :param input_channels: Number of input channels for the layer :type input_channels: int :param output_channels: Number of output channels for the layer :type output_channels: int :param weight_initializer: Initializer for the weights. Default is GlorotUniform. :type weight_initializer: ivy.Initializer, optional :param num_layers: Number of lstm cells in the lstm layer, default is 1. :type num_layers: int, optional :param return_sequence: Whether or not to return the entire output sequence, or just the latest timestep. Default is True. :type return_sequence: bool, optional :param return_state: Whether or not to return the latest hidden and cell states. Default is True. :type return_state: bool, optional :param dev_str: device on which to create the layer's variables 'cuda:0', 'cuda:1', 'cpu' etc. Default is cpu. :type dev_str: str, optional :param v: the variables for each of the lstm cells, as a container, constructed internally by default. :type v: ivy container of parameter arrays, optional """ self._input_channels = input_channels self._output_channels = output_channels self._w_init = weight_initializer self._num_layers = num_layers self._return_sequence = return_sequence self._return_state = return_state Module.__init__(self, dev_str, v)
# Public #
[docs] def get_initial_state(self, batch_shape): """ Get the initial state of the hidden and cell states, if not provided explicitly """ batch_shape = list(batch_shape) return ([ivy.zeros((batch_shape + [self._output_channels])) for i in range(self._num_layers)], [ivy.zeros((batch_shape + [self._output_channels])) for i in range(self._num_layers)])
# Overridden def _create_variables(self, dev_str): """ Create internal variables for the layer """ input_weights = dict(zip( ['layer_' + str(i) for i in range(self._num_layers)], [{'w': self._w_init.create_variables((self._input_channels if i == 0 else self._output_channels, 4 * self._output_channels), dev_str, self._output_channels, self._input_channels)} for i in range(self._num_layers)])) recurrent_weights = dict(zip( ['layer_' + str(i) for i in range(self._num_layers)], [{'w': self._w_init.create_variables((self._output_channels, 4 * self._output_channels), dev_str, self._output_channels, self._input_channels)} for i in range(self._num_layers)])) return {'input': input_weights, 'recurrent': recurrent_weights} def _forward(self, inputs, initial_state=None): """ Perform forward pass of the LSTM layer. :param inputs: Inputs to process *[batch_shape, t, in]*. :type inputs: array :param initial_state: 2-tuple of lists of the hidden states h and c for each layer, each of dimension *[batch_shape,out]*. Created internally if None. :type initial_state: tuple of list of arrays, optional :return: The outputs of the final lstm layer *[batch_shape, t, out]* and the hidden state tuple of lists, each of dimension *[batch_shape, out]* """ if initial_state is None: initial_state = self.get_initial_state(inputs.shape[:-2]) h_n_list = list() c_n_list = list() h_t = inputs for h_0, c_0, (_, lstm_input_var), (_, lstm_recurrent_var) in zip( initial_state[0], initial_state[1], self.v.input.items(), self.v.recurrent.items()): h_t, c_n = ivy.lstm_update(h_t, h_0, c_0, lstm_input_var.w, lstm_recurrent_var.w) h_n_list.append(h_t[..., -1, :]) c_n_list.append(c_n) if not self._return_sequence: h_t = h_t[..., -1, :] if not self._return_state: return h_t return h_t, (h_n_list, c_n_list)