= 16
bs = 3
c_in = 12
seq_len = 2
c_out = torch.rand(bs, c_in, seq_len)
xb
test_eq(RNNPlus(c_in, c_out)(xb).shape, [bs, c_out])=100, n_layers=2, bias=True, rnn_dropout=0.2, bidirectional=True, fc_dropout=0.5)(xb).shape,
test_eq(RNNPlus(c_in, c_out, hidden_size
[bs, c_out])=[100, 50, 10], bias=True, rnn_dropout=0.2, bidirectional=True, fc_dropout=0.5)(xb).shape,
test_eq(RNNPlus(c_in, c_out, hidden_size
[bs, c_out])=[100], n_layers=2, bias=True, rnn_dropout=0.2, bidirectional=True, fc_dropout=0.5)(xb).shape,
test_eq(RNNPlus(c_in, c_out, hidden_size
[bs, c_out])=100, n_layers=2, bias=True, rnn_dropout=0.2, bidirectional=True, fc_dropout=0.5)(xb).shape,
test_eq(LSTMPlus(c_in, c_out, hidden_size
[bs, c_out])=100, n_layers=2, bias=True, rnn_dropout=0.2, bidirectional=True, fc_dropout=0.5)(xb).shape,
test_eq(GRUPlus(c_in, c_out, hidden_size
[bs, c_out])=False)(xb).shape, [bs, c_out])
test_eq(RNNPlus(c_in, c_out, seq_len, last_step=False)(xb).shape, [bs, c_out])
test_eq(RNNPlus(c_in, c_out, seq_len, last_step=100, n_layers=2, bias=True, rnn_dropout=0.2, bidirectional=True, fc_dropout=0.5,
test_eq(RNNPlus(c_in, c_out, seq_len, hidden_size=False)(xb).shape,
last_step
[bs, c_out])=False)(xb).shape, [bs, c_out])
test_eq(LSTMPlus(c_in, c_out, seq_len, last_step=False)(xb).shape, [bs, c_out]) test_eq(GRUPlus(c_in, c_out, seq_len, last_step
RNNPlus
These are RNN, LSTM and GRU PyTorch implementations created by Ignacio Oguiza - oguiza@timeseriesAI.co
The idea of including a feature extractor to the RNN network comes from the solution developed by the UPSTAGE team (https://www.kaggle.com/songwonho, https://www.kaggle.com/limerobot and https://www.kaggle.com/jungikhyo). They finished in 3rd position in Kaggle’s Google Brain - Ventilator Pressure Prediction competition. They used a Conv1d + Stacked LSTM architecture.
GRUPlus
GRUPlus (c_in, c_out, seq_len=None, hidden_size=[100], n_layers=1, bias=True, rnn_dropout=0, bidirectional=False, n_cat_embeds=None, cat_embed_dims=None, cat_padding_idxs=None, cat_pos=None, feature_extractor=None, fc_dropout=0.0, last_step=True, bn=False, custom_head=None, y_range=None, init_weights=True, **kwargs)
A sequential container.
Modules will be added to it in the order they are passed in the constructor. Alternatively, an OrderedDict
of modules can be passed in. The forward()
method of [
Sequential](https://timeseriesAI.github.io/models.layers.html#sequential)
accepts any input and forwards it to the first module it contains. It then “chains” outputs to inputs sequentially for each subsequent module, finally returning the output of the last module.
The value a [
Sequential](https://timeseriesAI.github.io/models.layers.html#sequential)
provides over manually calling a sequence of modules is that it allows treating the whole container as a single module, such that performing a transformation on the [
Sequential](https://timeseriesAI.github.io/models.layers.html#sequential)
applies to each of the modules it stores (which are each a registered submodule of the [
Sequential](https://timeseriesAI.github.io/models.layers.html#sequential)
).
What’s the difference between a [
Sequential](https://timeseriesAI.github.io/models.layers.html#sequential)
and a :class:torch.nn.ModuleList
? A ModuleList
is exactly what it sounds like–a list for storing Module
s! On the other hand, the layers in a [
Sequential](https://timeseriesAI.github.io/models.layers.html#sequential)
are connected in a cascading way.
Example::
# Using Sequential to create a small model. When `model` is run,
# input will first be passed to `Conv2d(1,20,5)`. The output of
# `Conv2d(1,20,5)` will be used as the input to the first
# `ReLU`; the output of the first `ReLU` will become the input
# for `Conv2d(20,64,5)`. Finally, the output of
# `Conv2d(20,64,5)` will be used as input to the second `ReLU`
model = nn.Sequential(
nn.Conv2d(1,20,5),
nn.ReLU(),
nn.Conv2d(20,64,5),
nn.ReLU()
)
# Using Sequential with OrderedDict. This is functionally the
# same as the above code
model = nn.Sequential(OrderedDict([
('conv1', nn.Conv2d(1,20,5)),
('relu1', nn.ReLU()),
('conv2', nn.Conv2d(20,64,5)),
('relu2', nn.ReLU())
]))
LSTMPlus
LSTMPlus (c_in, c_out, seq_len=None, hidden_size=[100], n_layers=1, bias=True, rnn_dropout=0, bidirectional=False, n_cat_embeds=None, cat_embed_dims=None, cat_padding_idxs=None, cat_pos=None, feature_extractor=None, fc_dropout=0.0, last_step=True, bn=False, custom_head=None, y_range=None, init_weights=True, **kwargs)
A sequential container.
Modules will be added to it in the order they are passed in the constructor. Alternatively, an OrderedDict
of modules can be passed in. The forward()
method of [
Sequential](https://timeseriesAI.github.io/models.layers.html#sequential)
accepts any input and forwards it to the first module it contains. It then “chains” outputs to inputs sequentially for each subsequent module, finally returning the output of the last module.
The value a [
Sequential](https://timeseriesAI.github.io/models.layers.html#sequential)
provides over manually calling a sequence of modules is that it allows treating the whole container as a single module, such that performing a transformation on the [
Sequential](https://timeseriesAI.github.io/models.layers.html#sequential)
applies to each of the modules it stores (which are each a registered submodule of the [
Sequential](https://timeseriesAI.github.io/models.layers.html#sequential)
).
What’s the difference between a [
Sequential](https://timeseriesAI.github.io/models.layers.html#sequential)
and a :class:torch.nn.ModuleList
? A ModuleList
is exactly what it sounds like–a list for storing Module
s! On the other hand, the layers in a [
Sequential](https://timeseriesAI.github.io/models.layers.html#sequential)
are connected in a cascading way.
Example::
# Using Sequential to create a small model. When `model` is run,
# input will first be passed to `Conv2d(1,20,5)`. The output of
# `Conv2d(1,20,5)` will be used as the input to the first
# `ReLU`; the output of the first `ReLU` will become the input
# for `Conv2d(20,64,5)`. Finally, the output of
# `Conv2d(20,64,5)` will be used as input to the second `ReLU`
model = nn.Sequential(
nn.Conv2d(1,20,5),
nn.ReLU(),
nn.Conv2d(20,64,5),
nn.ReLU()
)
# Using Sequential with OrderedDict. This is functionally the
# same as the above code
model = nn.Sequential(OrderedDict([
('conv1', nn.Conv2d(1,20,5)),
('relu1', nn.ReLU()),
('conv2', nn.Conv2d(20,64,5)),
('relu2', nn.ReLU())
]))
RNNPlus
RNNPlus (c_in, c_out, seq_len=None, hidden_size=[100], n_layers=1, bias=True, rnn_dropout=0, bidirectional=False, n_cat_embeds=None, cat_embed_dims=None, cat_padding_idxs=None, cat_pos=None, feature_extractor=None, fc_dropout=0.0, last_step=True, bn=False, custom_head=None, y_range=None, init_weights=True, **kwargs)
A sequential container.
Modules will be added to it in the order they are passed in the constructor. Alternatively, an OrderedDict
of modules can be passed in. The forward()
method of [
Sequential](https://timeseriesAI.github.io/models.layers.html#sequential)
accepts any input and forwards it to the first module it contains. It then “chains” outputs to inputs sequentially for each subsequent module, finally returning the output of the last module.
The value a [
Sequential](https://timeseriesAI.github.io/models.layers.html#sequential)
provides over manually calling a sequence of modules is that it allows treating the whole container as a single module, such that performing a transformation on the [
Sequential](https://timeseriesAI.github.io/models.layers.html#sequential)
applies to each of the modules it stores (which are each a registered submodule of the [
Sequential](https://timeseriesAI.github.io/models.layers.html#sequential)
).
What’s the difference between a [
Sequential](https://timeseriesAI.github.io/models.layers.html#sequential)
and a :class:torch.nn.ModuleList
? A ModuleList
is exactly what it sounds like–a list for storing Module
s! On the other hand, the layers in a [
Sequential](https://timeseriesAI.github.io/models.layers.html#sequential)
are connected in a cascading way.
Example::
# Using Sequential to create a small model. When `model` is run,
# input will first be passed to `Conv2d(1,20,5)`. The output of
# `Conv2d(1,20,5)` will be used as the input to the first
# `ReLU`; the output of the first `ReLU` will become the input
# for `Conv2d(20,64,5)`. Finally, the output of
# `Conv2d(20,64,5)` will be used as input to the second `ReLU`
model = nn.Sequential(
nn.Conv2d(1,20,5),
nn.ReLU(),
nn.Conv2d(20,64,5),
nn.ReLU()
)
# Using Sequential with OrderedDict. This is functionally the
# same as the above code
model = nn.Sequential(OrderedDict([
('conv1', nn.Conv2d(1,20,5)),
('relu1', nn.ReLU()),
('conv2', nn.Conv2d(20,64,5)),
('relu2', nn.ReLU())
]))
= MultiConv1d(c_in, kss=[1,3,5,7])
feature_extractor = nn.Sequential(Transpose(1,2), nn.Linear(8,8), nn.SELU(), nn.Linear(8, 1), Squeeze())
custom_head =[32,16,8,4], bidirectional=True,
test_eq(LSTMPlus(c_in, c_out, seq_len, hidden_size=feature_extractor, custom_head=custom_head)(xb).shape, [bs, seq_len])
feature_extractor= MultiConv1d(c_in, kss=[1,3,5,7], keep_original=True)
feature_extractor = nn.Sequential(Transpose(1,2), nn.Linear(8,8), nn.SELU(), nn.Linear(8, 1), Squeeze())
custom_head =[32,16,8,4], bidirectional=True,
test_eq(LSTMPlus(c_in, c_out, seq_len, hidden_size=feature_extractor, custom_head=custom_head)(xb).shape, [bs, seq_len]) feature_extractor
[W NNPACK.cpp:53] Could not initialize NNPACK! Reason: Unsupported hardware.
= 16
bs = 3
c_in = 12
seq_len = 2
c_out = torch.rand(bs,1,seq_len)
x1 = torch.randint(0,3,(bs,1,seq_len))
x2 = torch.randint(0,5,(bs,1,seq_len))
x3 = torch.cat([x1,x2,x3],1)
xb
= partial(create_mlp_head, fc_dropout=0.5)
custom_head =False, custom_head=custom_head)(xb).shape, [bs, c_out])
test_eq(LSTMPlus(c_in, c_out, seq_len, last_step= partial(create_pool_head, concat_pool=True, fc_dropout=0.5)
custom_head =False, custom_head=custom_head)(xb).shape, [bs, c_out])
test_eq(LSTMPlus(c_in, c_out, seq_len, last_step= partial(create_pool_plus_head, fc_dropout=0.5)
custom_head =False, custom_head=custom_head)(xb).shape, [bs, c_out])
test_eq(LSTMPlus(c_in, c_out, seq_len, last_step= partial(create_conv_head)
custom_head =False, custom_head=custom_head)(xb).shape, [bs, c_out])
test_eq(LSTMPlus(c_in, c_out, seq_len, last_step=[100, 50], n_layers=2, bias=True, rnn_dropout=0.2, bidirectional=True)(xb).shape,
test_eq(LSTMPlus(c_in, c_out, seq_len, hidden_size
[bs, c_out])
= [3, 5]
n_cat_embeds = [1, 2]
cat_pos = partial(create_conv_head)
custom_head = LSTMPlus(c_in, c_out, seq_len, hidden_size=[100, 50], n_layers=2, bias=True, rnn_dropout=0.2, bidirectional=True,
m =n_cat_embeds, cat_pos=cat_pos)
n_cat_embeds test_eq(m(xb).shape, [bs, c_out])
from tsai.data.all import *
from tsai.models.utils import *
= 'NATOPS'
dsid = 16
bs = get_UCR_data(dsid, return_split=False)
X, y, splits = [None, [Categorize()]]
tfms = get_ts_dls(X, y, tfms=tfms, splits=splits, bs=bs) dls
= build_ts_model(LSTMPlus, dls=dls)
model print(model[-1])
= Learner(dls, model, metrics=accuracy)
learn 1, 3e-3) learn.fit_one_cycle(
Sequential(
(0): LastStep()
(1): Linear(in_features=100, out_features=6, bias=True)
)
= LSTMPlus(dls.vars, dls.c, dls.len, last_step=False)
model = Learner(dls, model, metrics=accuracy)
learn 1, 3e-3) learn.fit_one_cycle(
epoch | train_loss | valid_loss | accuracy | time |
---|
= partial(create_pool_head, concat_pool=True)
custom_head = LSTMPlus(dls.vars, dls.c, dls.len, last_step=False, custom_head=custom_head)
model = Learner(dls, model, metrics=accuracy)
learn 1, 3e-3) learn.fit_one_cycle(
= partial(create_pool_plus_head, concat_pool=True)
custom_head = LSTMPlus(dls.vars, dls.c, dls.len, last_step=False, custom_head=custom_head)
model = Learner(dls, model, metrics=accuracy)
learn 1, 3e-3) learn.fit_one_cycle(
= RNNPlus(c_in, c_out, seq_len, hidden_size=100,n_layers=2,bidirectional=True,rnn_dropout=.5,fc_dropout=.5)
m print(m)
print(count_parameters(m))
m(xb).shape
RNNPlus(
(backbone): _RNN_Backbone(
(to_cat_embed): Identity()
(feature_extractor): Identity()
(rnn): Sequential(
(0): RNN(3, 100, num_layers=2, batch_first=True, dropout=0.5, bidirectional=True)
(1): LSTMOutput()
)
(transpose): Transpose(dims=-1, -2).contiguous()
)
(head): Sequential(
(0): LastStep()
(1): Dropout(p=0.5, inplace=False)
(2): Linear(in_features=200, out_features=2, bias=True)
)
)
81802
torch.Size([16, 2])
= LSTMPlus(c_in, c_out, seq_len, hidden_size=100,n_layers=2,bidirectional=True,rnn_dropout=.5,fc_dropout=.5)
m print(m)
print(count_parameters(m))
m(xb).shape
LSTMPlus(
(backbone): _RNN_Backbone(
(to_cat_embed): Identity()
(feature_extractor): Identity()
(rnn): Sequential(
(0): LSTM(3, 100, num_layers=2, batch_first=True, dropout=0.5, bidirectional=True)
(1): LSTMOutput()
)
(transpose): Transpose(dims=-1, -2).contiguous()
)
(head): Sequential(
(0): LastStep()
(1): Dropout(p=0.5, inplace=False)
(2): Linear(in_features=200, out_features=2, bias=True)
)
)
326002
torch.Size([16, 2])
= GRUPlus(c_in, c_out, seq_len, hidden_size=100,n_layers=2,bidirectional=True,rnn_dropout=.5,fc_dropout=.5)
m print(m)
print(count_parameters(m))
m(xb).shape
GRUPlus(
(backbone): _RNN_Backbone(
(to_cat_embed): Identity()
(feature_extractor): Identity()
(rnn): Sequential(
(0): GRU(3, 100, num_layers=2, batch_first=True, dropout=0.5, bidirectional=True)
(1): LSTMOutput()
)
(transpose): Transpose(dims=-1, -2).contiguous()
)
(head): Sequential(
(0): LastStep()
(1): Dropout(p=0.5, inplace=False)
(2): Linear(in_features=200, out_features=2, bias=True)
)
)
244602
torch.Size([16, 2])
Converting a model to TorchScript
= GRUPlus(c_in, c_out, hidden_size=100, n_layers=2, bidirectional=True, rnn_dropout=.5, fc_dropout=.5)
model eval()
model.= torch.rand(1, c_in, 50)
inp = model(inp)
output print(output)
tensor([[-0.0677, -0.0857]], grad_fn=<AddmmBackward0>)
Tracing
# save to gpu, cpu or both
= torch.jit.trace(model.cpu(), inp)
traced_cpu print(traced_cpu)
"cpu.pt")
torch.jit.save(traced_cpu,
# load cpu or gpu model
= torch.jit.load("cpu.pt")
traced_cpu
test_eq(traced_cpu(inp), output)
!rm "cpu.pt"
GRUPlus(
original_name=GRUPlus
(backbone): _RNN_Backbone(
original_name=_RNN_Backbone
(to_cat_embed): Identity(original_name=Identity)
(feature_extractor): Identity(original_name=Identity)
(rnn): Sequential(
original_name=Sequential
(0): GRU(original_name=GRU)
(1): LSTMOutput(original_name=LSTMOutput)
)
(transpose): Transpose(original_name=Transpose)
)
(head): Sequential(
original_name=Sequential
(0): LastStep(original_name=LastStep)
(1): Dropout(original_name=Dropout)
(2): Linear(original_name=Linear)
)
)
Converting a model to ONNX
import onnx
# model being run
torch.onnx.export(model.cpu(), # model input (or a tuple for multiple inputs)
inp, "cpu.onnx", # where to save the model (can be a file or file-like object)
=True, # store the trained parameter weights inside the model file
export_params=False,
verbose=13, # the ONNX version to export the model to
opset_version=True, # whether to execute constant folding for optimization
do_constant_folding= ['input'], # the model's input names
input_names = ['output'], # the model's output names
output_names ={
dynamic_axes'input' : {0 : 'batch_size'},
'output' : {0 : 'batch_size'}} # variable length axes
)
= onnx.load("cpu.onnx") # Load the model and check it's ok
onnx_model onnx.checker.check_model(onnx_model)
import onnxruntime as ort
= ort.InferenceSession('cpu.onnx')
ort_sess = ort_sess.run(None, {'input': inp.numpy()})
out
= ort_sess.get_inputs()[0].name
input_name = ort_sess.get_outputs()[0].name
output_name = ort_sess.get_inputs()[0].shape
input_dims
test_close(out, output.detach().numpy())!rm "cpu.onnx"