= torch.rand(16, 3, 100)
t = create_subsequence_mask(t, sync=False)
mask
test_eq(mask.shape, t.shape)= create_subsequence_mask(t, sync=True)
mask
test_eq(mask.shape, t.shape)= create_variable_mask(t)
mask
test_eq(mask.shape, t.shape)= create_future_mask(t)
mask test_eq(mask.shape, t.shape)
MVP (aka TSBERT)
Self-Supervised Pretraining of Time Series Models
Masked Value Predictor callback used to predict time series step values after a binary mask has been applied.
self_mask
self_mask (o)
create_future_mask
create_future_mask (o, r=0.15, sync=False)
create_variable_mask
create_variable_mask (o, r=0.15)
create_subsequence_mask
create_subsequence_mask (o, r=0.15, lm=3, stateful=True, sync=False)
= torch.randn(2, 3, 4)
o >.5] = np.nan
o[osum(), 0) test_eq(torch.isnan(self_mask(o)).
= torch.rand(16, 30, 100)
t = create_subsequence_mask(t, r=.15) # default settings
mask bool)
test_eq(mask.dtype, torch.=(10, 3))
plt.figure(figsize0], cmap='cool')
plt.pcolormesh(mask[f'sample 0 subsequence mask (sync=False) - default mean: {mask[0].float().mean().item():.3f}')
plt.title(
plt.show()=(10, 3))
plt.figure(figsize1], cmap='cool')
plt.pcolormesh(mask[f'sample 1 subsequence mask (sync=False) - default mean: {mask[1].float().mean().item():.3f}')
plt.title( plt.show()
= torch.rand(16, 30, 100)
t = create_subsequence_mask(t, r=.5) # 50% of values masked
mask bool)
test_eq(mask.dtype, torch.=(10, 3))
plt.figure(figsize0], cmap='cool')
plt.pcolormesh(mask[f'sample 0 subsequence mask (r=.5) mean: {mask[0].float().mean().item():.3f}')
plt.title( plt.show()
= torch.rand(16, 30, 100)
t = create_subsequence_mask(t, lm=5) # average length of mask = 5
mask bool)
test_eq(mask.dtype, torch.=(10, 3))
plt.figure(figsize0], cmap='cool')
plt.pcolormesh(mask[f'sample 0 subsequence mask (lm=5) mean: {mask[0].float().mean().item():.3f}')
plt.title( plt.show()
= torch.rand(16, 30, 100)
t = create_subsequence_mask(t, stateful=False) # individual time steps masked
mask bool)
test_eq(mask.dtype, torch.=(10, 3))
plt.figure(figsize0], cmap='cool')
plt.pcolormesh(mask[f'per sample subsequence mask (stateful=False) mean: {mask[0].float().mean().item():.3f}')
plt.title( plt.show()
= torch.rand(1, 30, 100)
t = create_subsequence_mask(t, sync=True) # all time steps masked simultaneously
mask bool)
test_eq(mask.dtype, torch.=(10, 3))
plt.figure(figsize0], cmap='cool')
plt.pcolormesh(mask[f'per sample subsequence mask (sync=True) mean: {mask[0].float().mean().item():.3f}')
plt.title( plt.show()
= torch.rand(1, 30, 100)
t = create_variable_mask(t) # masked variables
mask bool)
test_eq(mask.dtype, torch.=(10, 3))
plt.figure(figsize0], cmap='cool')
plt.pcolormesh(mask[f'per sample variable mask mean: {mask[0].float().mean().item():.3f}')
plt.title( plt.show()
= torch.rand(1, 30, 100)
t = create_future_mask(t, r=.15, sync=True) # masked steps
mask bool)
test_eq(mask.dtype, torch.=(10, 3))
plt.figure(figsize0], cmap='cool')
plt.pcolormesh(mask[f'future mask mean: {mask[0].float().mean().item():.3f}')
plt.title( plt.show()
= torch.rand(1, 30, 100)
t = create_future_mask(t, r=.15, sync=False) # masked steps
mask = create_future_mask(t, r=.15, sync=True) # masked steps
mask bool)
test_eq(mask.dtype, torch.=(10, 3))
plt.figure(figsize0], cmap='cool')
plt.pcolormesh(mask[f'future mask mean: {mask[0].float().mean().item():.3f}')
plt.title( plt.show()
create_mask
create_mask (o, r=0.15, lm=3, stateful=True, sync=False, subsequence_mask=True, variable_mask=False, future_mask=False)
MVP
MVP (r:float=0.15, subsequence_mask:bool=True, lm:float=3.0, stateful:bool=True, sync:bool=False, variable_mask:bool=False, future_mask:bool=False, custom_mask:Optional=None, sel_vars:Optional[list]=None, nan_to_num:int=0, window_size:Optional[tuple]=None, dropout:float=0.1, crit:<built- infunctioncallable>=None, weights_path:Optional[str]=None, target_dir:str='./models/MVP', fname:str='model', save_best:bool=True, verbose:bool=False)
Basic class handling tweaks of the training loop by changing a Learner
in various events
Experiments
from tsai.data.external import get_UCR_data, check_data
from tsai.data.preprocessing import TSStandardize, TSNan2Value
from tsai.data.core import TSCategorize, get_ts_dls
from tsai.learner import ts_learner
from tsai.models.InceptionTimePlus import InceptionTimePlus
= 'MoteStrain'
dsid = get_UCR_data(dsid, split_data=False)
X, y, splits False)
check_data(X, y, splits, <-1] = np.nan # This is to test the model works well even if nan values are passed through the dataloaders. X[X
X - shape: [1272 samples x 1 features x 84 timesteps] type: memmap dtype:float32 isnan: 0
y - shape: (1272,) type: memmap dtype:<U1 n_classes: 2 (636 samples per class) ['1', '2'] isnan: False
splits - n_splits: 2 shape: [20, 1252] overlap: False
# Pre-train
= [None, [TSCategorize()]]
tfms = [TSStandardize(by_var=True)]
batch_tfms = get_ts_dls(X, splits=splits, tfms=tfms, batch_tfms=batch_tfms)
unlabeled_dls = ts_learner(unlabeled_dls, InceptionTimePlus, cbs=[MVP(fname=f'{dsid}', window_size=(.5, 1))]) # trained on variable window size
learn 1, 3e-3) learn.fit_one_cycle(
epoch | train_loss | valid_loss | time |
---|---|---|---|
0 | 1.270972 | 1.194974 | 00:06 |
= ts_learner(unlabeled_dls, InceptionTimePlus, cbs=[MVP(weights_path=f'models/MVP/{dsid}.pth')])
learn 1, 3e-3) learn.fit_one_cycle(
epoch | train_loss | valid_loss | time |
---|---|---|---|
0 | 0.837741 | 1.200484 | 00:07 |
=True) # these preds are highly inaccurate as the model's been trained for just 1 epoch for testing purposes learn.MVP.show_preds(sharey
# Fine-tune
= [None, [TSCategorize()]]
tfms = [TSStandardize(by_var=True), TSNan2Value()]
batch_tfms = get_ts_dls(X, y, splits=splits, tfms=tfms, batch_tfms=batch_tfms, bs=64)
labeled_dls = ts_learner(labeled_dls, InceptionTimePlus, pretrained=True, weights_path=f'models/MVP/{dsid}.pth', metrics=accuracy)
learn 1) learn.fit_one_cycle(
epoch | train_loss | valid_loss | accuracy | time |
---|---|---|---|---|
0 | 0.773015 | 0.744267 | 0.460863 | 00:09 |
= [None, [TSCategorize()]]
tfms = [TSStandardize(by_var=True), TSNan2Value()]
batch_tfms = get_ts_dls(X, splits=splits, tfms=tfms, batch_tfms=batch_tfms, bs=64)
unlabeled_dls = f'{dsid}_test'
fname = MVP(subsequence_mask=True, sync='random', variable_mask=True, future_mask=True, fname=fname)
mvp = ts_learner(unlabeled_dls, InceptionTimePlus, metrics=accuracy, cbs=mvp) # Metrics will not be used! learn
/Users/nacho/opt/anaconda3/envs/py37torch113/lib/python3.7/site-packages/ipykernel_launcher.py:42: UserWarning: Only future_mask will be used
= [None, [TSCategorize()]]
tfms = [TSStandardize(by_var=True)]
batch_tfms = get_ts_dls(X, splits=splits, tfms=tfms, batch_tfms=batch_tfms, bs=64)
unlabeled_dls = f'{dsid}_test'
fname = MVP(subsequence_mask=True, sync='random', variable_mask=True, future_mask=True, custom_mask=partial(create_future_mask, r=.15),
mvp =fname)
fname= ts_learner(unlabeled_dls, InceptionTimePlus, metrics=accuracy, cbs=mvp) # Metrics will not be used! learn
/Users/nacho/opt/anaconda3/envs/py37torch113/lib/python3.7/site-packages/ipykernel_launcher.py:40: UserWarning: Only custom_mask will be used
try: os.remove("models/MVP/MoteStrain.pth")
except OSError: pass
try: os.remove("models/MVP/model.pth")
except OSError: pass