from tsai.data.external import get_UCR_data
Data Core
Main Numpy and Times Series functions used throughout the library.
= 'OliveOil'
dsid = get_UCR_data(dsid, on_disk=True, force_download=True)
X_train, y_train, X_valid, y_valid = get_UCR_data(dsid, on_disk=True, return_split=False, force_download=True)
X_on_disk, y_on_disk, splits = get_UCR_data(dsid, on_disk=False, return_split=False, force_download=True)
X_in_memory, y_in_memory, splits = cat2int(y_on_disk)
y_tensor = y_tensor.numpy() y_array
ToNumpyTensor
ToNumpyTensor (enc=None, dec=None, split_idx=None, order=None)
Transforms an object into NumpyTensor
NumpyTensor
NumpyTensor (o, dtype=None, device=None, copy=None, requires_grad=False, **kwargs)
Returns a tensor
with subclass NumpyTensor
that has a show method
TSTensor
TSTensor (o, dtype=None, device=None, copy=None, requires_grad=False, **kwargs)
Returns a tensor
with subclass TSTensor
that has a show method
show_tuple
show_tuple (tup, nrows:int=1, ncols:int=1, sharex:"bool|Literal['none','all','row','col']"=False, sharey:"bool|Literal['none','all','row','col']"=False, squeeze:bool=True, width_ratios:Sequence[float]|None=None, height_ratios:Sequence[float]|None=None, subplot_kw:dict[str,Any]|None=None, gridspec_kw:dict[str,Any]|None=None)
Display a timeseries plot from a decoded tuple
ToTSTensor
ToTSTensor (enc=None, dec=None, split_idx=None, order=None)
Transforms an object into TSTensor
= np.random.randn(2, 3, 4).astype(np.float16)
a assert np.shares_memory(a, NumpyTensor(a))
assert np.shares_memory(a, TSTensor(a))
= np.random.randn(2, 3, 4).astype(np.float32)
a assert np.shares_memory(a, NumpyTensor(a))
assert np.shares_memory(a, TSTensor(a))
= np.random.randint(10, size=10).astype(np.int64)
a assert np.shares_memory(a, NumpyTensor(a))
assert np.shares_memory(a, TSTensor(a))
= np.random.randint(10, size=10).astype(np.int32)
a assert np.shares_memory(a, NumpyTensor(a))
assert np.shares_memory(a, TSTensor(a))
= torch.rand(2, 3, 4).float()
a assert np.shares_memory(a, NumpyTensor(a))
assert np.shares_memory(a, TSTensor(a))
= torch.randint(3, (10,))
a assert np.shares_memory(a, NumpyTensor(a))
assert np.shares_memory(a, TSTensor(a))
= TSTensor(torch.randn(2, 3, 4))
t = torch.tensor(3., requires_grad=True)
p = torch.add(t, p)
test True)
test_eq(test.requires_grad, type(t.data), torch.Tensor)
test_eq(type(t), TSTensor) test_eq(
= L([0,1,2,3], [4,5,6,7], [8, 9, 10, 11])
l TSTensor(l), TSTensor(l).data
(TSTensor(vars:3, len:4, device=cpu, dtype=torch.int64),
tensor([[ 0, 1, 2, 3],
[ 4, 5, 6, 7],
[ 8, 9, 10, 11]]))
= TSTensor(X_train)
t for i in range(4):
print(t, t.ndim, torch.is_tensor(t))
if i < 3: t = t[0]
TSTensor(samples:30, vars:1, len:570, device=cpu, dtype=torch.float32) 3 True
TSTensor(vars:1, len:570, device=cpu, dtype=torch.float32) 2 True
TSTensor(len:570, device=cpu, dtype=torch.float32) 1 True
TSTensor([-0.6113752722740173], device=cpu, dtype=torch.float32) 0 True
TSTensor(X_on_disk)
TSTensor(samples:60, vars:1, len:570, device=cpu, dtype=torch.float32)
ToTSTensor()(X_on_disk)
TSTensor(samples:60, vars:1, len:570, device=cpu, dtype=torch.float32)
; TSTensor(X_train).show()
='1'); TSTensor(X_train).show(title
'1', '2'])) show_tuple((TSTensor(X_train), [
10).reshape(2,5)), 1)) show_tuple((TSTensor(np.arange(
10).reshape(2,5)), '1')) show_tuple((TSTensor(np.arange(
10).reshape(2,5)), [1,2])) show_tuple((TSTensor(np.arange(
10).reshape(2,5)), ['1', '2'])) show_tuple((TSTensor(np.arange(
TSMaskTensor
TSMaskTensor (o, dtype=None, device=None, copy=None, requires_grad=False, **kwargs)
Returns a tensor
with subclass NumpyTensor
that has a show method
TSLabelTensor
TSLabelTensor (o, dtype=None, device=None, copy=None, requires_grad=False, **kwargs)
Returns a tensor
with subclass NumpyTensor
that has a show method
= TSLabelTensor(torch.randint(0,10,(1, 2, 3)))
t 0], t[0][0], t[0][0][0] t, t[
(TSLabelTensor(shape:(1, 2, 3), device=cpu, dtype=torch.int64),
TSLabelTensor(shape:(2, 3), device=cpu, dtype=torch.int64),
TSLabelTensor(shape:(3,), device=cpu, dtype=torch.int64),
7)
= TSMaskTensor(torch.randint(0,10,(1, 2, 3)))
t 0], t[0][0], t[0][0][0] t, t[
(TSMaskTensor(shape:(1, 2, 3), device=cpu, dtype=torch.int64),
TSMaskTensor(shape:(2, 3), device=cpu, dtype=torch.int64),
TSMaskTensor(shape:(3,), device=cpu, dtype=torch.int64),
1)
TSClassification
TSClassification (vocab=None, sort=True)
Vectorized, reversible transform of category string to vocab
id
ToInt
ToInt (enc=None, dec=None, split_idx=None, order=None)
Transforms an object dtype to int
ToFloat
ToFloat (enc=None, dec=None, split_idx=None, order=None)
Transforms an object dtype to float (vectorized)
= np.random.randint(0, 2, 10)
a = np.array(['1', '2', '3'])
b = np.array(['1.0', '2.0', '3.0'])
c = torch.randint(0, 2, (10, ))
t 'float32')
test_eq(ToFloat()(a).dtype, 'float32')
test_eq(ToFloat()(b).dtype, 'float32')
test_eq(ToFloat()(c).dtype, test_eq(ToFloat()(t).dtype, torch.float32)
= np.random.rand(10)*10
a = np.array(['1.0', '2.0', '3.0'])
b = torch.rand(10)*10
t 'int64')
test_eq(ToInt()(a).dtype, 'int64')
test_eq(ToInt()(b).dtype, long) test_eq(ToInt()(t).dtype, torch.
= TSClassification()
t 0]])
t.setup(y_on_disk[splits[= t(y_on_disk)
y_encoded print(y_encoded)
test_eq(t.decodes(y_encoded), y_on_disk)
TensorCategory([0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1,
1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3])
= np.random.randint(0,3,20)
y_multi= np.asarray(alphabet[y_multi]).reshape(4,5)
y_multi = TSClassification()
tfm
tfm.setup(y_multi)= tfm(y_multi)
enc_y_multi
test_eq(y_multi, tfm.decode(enc_y_multi)) enc_y_multi
TensorCategory([[0, 1, 1, 1, 2],
[0, 1, 2, 1, 0],
[2, 1, 0, 1, 2],
[0, 2, 0, 2, 2]])
TSMultiLabelClassification
TSMultiLabelClassification (c=None, vocab=None, add_na=False, sort=True)
Reversible combined transform of multi-category strings to one-hot encoded vocab
id
TSTensorBlock
TSTensorBlock (type_tfms=None, item_tfms=None, batch_tfms=None, dl_type=None, dls_kwargs=None)
Initialize self. See help(type(self)) for accurate signature.
NumpyTensorBlock
NumpyTensorBlock (type_tfms=None, item_tfms=None, batch_tfms=None, dl_type=None, dls_kwargs=None)
Initialize self. See help(type(self)) for accurate signature.
0].__name__, 'ToNumpyTensor')
test_eq(NumpyTensorBlock().item_tfms[0].__name__, 'ToTSTensor') test_eq(TSTensorBlock().item_tfms[
TSDataset
TSDataset (X, y=None, split=None, sel_vars=None, sel_steps=None, types=None, dtype=None, device=None)
Initialize self. See help(type(self)) for accurate signature.
NumpyDataset
NumpyDataset (X, y=None, types=None)
Initialize self. See help(type(self)) for accurate signature.
TorchDataset
TorchDataset (X, y=None)
Initialize self. See help(type(self)) for accurate signature.
= np.random.rand(5,6,7)
a = np.random.rand(5)
b = NumpyDataset(a,b)
ds = ds[[0,4]]
xb, yb 2,6,7))
test_eq(xb.shape, (2,)) test_eq(yb.shape, (
TSTfmdLists
TSTfmdLists (items=None, *rest, use_list=False, match=None)
A Pipeline
of tfms
applied to a collection of items
Type | Default | Details | |
---|---|---|---|
items | list | Items to apply Transform s to |
|
use_list | bool | None | Use list in L |
NoTfmLists
NoTfmLists (items=None, *rest, use_list=False, match=None)
A Pipeline
of tfms
applied to a collection of items
= X_on_disk
items = TfmdLists(items, tfms=None, splits=splits)
tl len(tl), len(X_on_disk))
test_eq(len(tl.train), len(splits[0]))
test_eq(len(tl.valid), len(splits[1]))
test_eq(0,4,7]], X_on_disk[[0,4,7]])
test_eq(tl[[0,4,7]], X_on_disk[splits[0][0,4,7]])
test_eq(tl.train[[0,4,7]], X_on_disk[splits[1][0,4,7]])
test_eq(tl.valid[[0], items[0])
test_eq(tl[0,1]], items[[0,1]])
test_eq(tl[[0]), tl[0])
test_eq(tl.decode(tl[None, 0, 1)) test_eq((tl.split_idx, tl.train.split_idx, tl.valid.split_idx), (
= X_on_disk
items = TSTfmdLists(items, tfms=None, splits=splits)
tl len(tl), len(X_on_disk))
test_eq(len(tl.train), len(splits[0]))
test_eq(len(tl.valid), len(splits[1]))
test_eq(0,4,7]], X_on_disk[[0,4,7]])
test_eq(tl[[0,4,7]], X_on_disk[splits[0][0,4,7]])
test_eq(tl.train[[0,4,7]], X_on_disk[splits[1][0,4,7]])
test_eq(tl.valid[[0], items[0])
test_eq(tl[0,1]], items[[0,1]])
test_eq(tl[[0]), tl[0])
test_eq(tl.decode(tl[None, 0, 1)) test_eq((tl.split_idx, tl.train.split_idx, tl.valid.split_idx), (
= X_on_disk
items = NoTfmLists(items, splits=splits)
ntl len(ntl), len(X_on_disk))
test_eq(len(ntl.train), len(splits[0]))
test_eq(len(ntl.valid), len(splits[1]))
test_eq(len(X_on_disk)))
test_eq(ntl._splits, np.arange(len(splits[0])))
test_eq(ntl.train._splits, np.arange(len(splits[0]), len(X_on_disk)))
test_eq(ntl.valid._splits, np.arange(print(ntl)
print(ntl.train)
print(ntl.valid)
0,4,7]], X_on_disk[[0,4,7]])
test_eq(ntl[[0,4,7]], X_on_disk[splits[0][0,4,7]])
test_eq(ntl.train[[0,4,7]], X_on_disk[splits[1][0,4,7]])
test_eq(ntl.valid[[0], items[0])
test_eq(ntl[0,1]], items[[0,1]])
test_eq(ntl[[
test_eq(ntl[:], X_on_disk)0].shape, stack(ntl[[0,1]]).shape
ntl[0]), ntl[0])
test_eq(ntl.decode(ntl[assert id(items) == id(ntl.items) == id(ntl.train.items) == id(ntl.valid.items)
None, 0, 1)) test_eq((ntl.split_idx, ntl.train.split_idx, ntl.valid.split_idx), (
NoTfmLists: memmap(60, 1, 570)
NoTfmLists: memmap(30, 1, 570)
NoTfmLists: memmap(30, 1, 570)
= X_on_disk
subitems = ntl._new(X_on_disk)
new_ntl test_eq(new_ntl[:], X_on_disk)
= random_choice(len(X_on_disk), 10, False)
idxs = ntl._new(X_on_disk[idxs])
new_ntl test_eq(new_ntl[:], X_on_disk[idxs])
= random_choice(len(X_on_disk), 10, False)
idxs = ntl.valid._new(X_on_disk[idxs])
new_ntl test_eq(new_ntl[:], X_on_disk[idxs])
tscoll_repr
tscoll_repr (c, max_n=10)
String repr of up to max_n
items of (possibly lazy) collection c
NumpyDatasets
NumpyDatasets (items:list=None, tfms:MutableSequence|Pipeline=None, tls:TfmdLists=None, n_inp:int=None, dl_type=None, use_list:bool=None, do_setup:bool=True, split_idx:int=None, train_setup:bool=True, splits:list=None, types=None, verbose:bool=False)
A dataset that creates tuples from X (and y) and applies tfms
of type item_tfms
Type | Default | Details | |
---|---|---|---|
items | NoneType | None | |
tfms | collections.abc.MutableSequence | fastcore.transform.Pipeline | Transform (s) or Pipeline to apply |
|
tls | NoneType | None | |
n_inp | NoneType | None | |
dl_type | TfmdDL | None | Type of DataLoader |
use_list | bool | None | Use list in L |
do_setup | bool | True | Call setup() for Transform |
split_idx | int | None | Apply Transform (s) to training or validation set. 0 for training set and 1 for validation set |
train_setup | bool | True | Apply Transform (s) only on training DataLoader |
splits | list | None | Indices for training and validation sets |
types | NoneType | None | Types of data in items |
verbose | bool | False | Print verbose output |
TSDatasets
TSDatasets (items:list=None, tfms:MutableSequence|Pipeline=None, tls:TfmdLists=None, n_inp:int=None, dl_type=None, use_list:bool=None, do_setup:bool=True, split_idx:int=None, train_setup:bool=True, splits:list=None, types=None, verbose:bool=False)
A dataset that creates tuples from X (and optionally y) and applies item_tfms
Type | Default | Details | |
---|---|---|---|
items | NoneType | None | |
tfms | collections.abc.MutableSequence | fastcore.transform.Pipeline | Transform (s) or Pipeline to apply |
|
tls | NoneType | None | |
n_inp | NoneType | None | |
dl_type | TfmdDL | None | Type of DataLoader |
use_list | bool | None | Use list in L |
do_setup | bool | True | Call setup() for Transform |
split_idx | int | None | Apply Transform (s) to training or validation set. 0 for training set and 1 for validation set |
train_setup | bool | True | Apply Transform (s) only on training DataLoader |
splits | list | None | Indices for training and validation sets |
types | NoneType | None | Types of data in items |
verbose | bool | False | Print verbose output |
= TSDatasets(X_on_disk, y_on_disk, splits=splits, tfms=[None, TSClassification()], inplace=True)
dsets = random_choice(len(splits[0]), 10, False).tolist()
i
test_eq(dsets.subset(i), dsets.train.subset(i))
dsets.valid.subset(i)0,6,8]]
dsets.valid.subset(i)[[0,6,8]], dsets.train.subset(i)[[0,6,8]])
test_eq(dsets.subset(i)[[0,7,3])
dsets.subset([ dsets.subset(i), dsets.train.subset(i), dsets.valid.subset(i)
((#10) [(TSTensor(vars:1, len:570, device=cpu, dtype=torch.float32), TensorCategory(1)), (TSTensor(vars:1, len:570, device=cpu, dtype=torch.float32), TensorCategory(2)), (TSTensor(vars:1, len:570, device=cpu, dtype=torch.float32), TensorCategory(3)), (TSTensor(vars:1, len:570, device=cpu, dtype=torch.float32), TensorCategory(1)), (TSTensor(vars:1, len:570, device=cpu, dtype=torch.float32), TensorCategory(3)), (TSTensor(vars:1, len:570, device=cpu, dtype=torch.float32), TensorCategory(1)), (TSTensor(vars:1, len:570, device=cpu, dtype=torch.float32), TensorCategory(0)), (TSTensor(vars:1, len:570, device=cpu, dtype=torch.float32), TensorCategory(3)), (TSTensor(vars:1, len:570, device=cpu, dtype=torch.float32), TensorCategory(3)), (TSTensor(vars:1, len:570, device=cpu, dtype=torch.float32), TensorCategory(3))] ...],
(#10) [(TSTensor(vars:1, len:570, device=cpu, dtype=torch.float32), TensorCategory(1)), (TSTensor(vars:1, len:570, device=cpu, dtype=torch.float32), TensorCategory(2)), (TSTensor(vars:1, len:570, device=cpu, dtype=torch.float32), TensorCategory(3)), (TSTensor(vars:1, len:570, device=cpu, dtype=torch.float32), TensorCategory(1)), (TSTensor(vars:1, len:570, device=cpu, dtype=torch.float32), TensorCategory(3)), (TSTensor(vars:1, len:570, device=cpu, dtype=torch.float32), TensorCategory(1)), (TSTensor(vars:1, len:570, device=cpu, dtype=torch.float32), TensorCategory(0)), (TSTensor(vars:1, len:570, device=cpu, dtype=torch.float32), TensorCategory(3)), (TSTensor(vars:1, len:570, device=cpu, dtype=torch.float32), TensorCategory(3)), (TSTensor(vars:1, len:570, device=cpu, dtype=torch.float32), TensorCategory(3))] ...],
(#10) [(TSTensor(vars:1, len:570, device=cpu, dtype=torch.float32), TensorCategory(1)), (TSTensor(vars:1, len:570, device=cpu, dtype=torch.float32), TensorCategory(2)), (TSTensor(vars:1, len:570, device=cpu, dtype=torch.float32), TensorCategory(3)), (TSTensor(vars:1, len:570, device=cpu, dtype=torch.float32), TensorCategory(1)), (TSTensor(vars:1, len:570, device=cpu, dtype=torch.float32), TensorCategory(3)), (TSTensor(vars:1, len:570, device=cpu, dtype=torch.float32), TensorCategory(1)), (TSTensor(vars:1, len:570, device=cpu, dtype=torch.float32), TensorCategory(0)), (TSTensor(vars:1, len:570, device=cpu, dtype=torch.float32), TensorCategory(3)), (TSTensor(vars:1, len:570, device=cpu, dtype=torch.float32), TensorCategory(3)), (TSTensor(vars:1, len:570, device=cpu, dtype=torch.float32), TensorCategory(3))] ...])
= [None, TSClassification()]
tfms = TSDatasets(X_on_disk, y_on_disk, splits=splits, tfms=tfms, inplace=False)
dsets assert id(X_on_disk) == id(dsets.ptls[0].items) == id(dsets.train.ptls[0].items) == id(dsets.valid.ptls[0].items)
= None
tfms = TSDatasets(X_on_disk, splits=splits, tfms=tfms, inplace=False)
dsets assert id(X_on_disk) == id(dsets.ptls[0].items) == id(dsets.train.ptls[0].items) == id(dsets.valid.ptls[0].items)
TSDatasets.add_unlabeled
TSDatasets.add_unlabeled (X, inplace=True)
TSDatasets.add_test
TSDatasets.add_test (X, y=None, inplace=True)
TSDatasets.add_dataset
TSDatasets.add_dataset (X, y=None, inplace=True)
NumpyDatasets.add_unlabeled
NumpyDatasets.add_unlabeled (X, inplace=True)
NumpyDatasets.add_test
NumpyDatasets.add_test (X, y=None, inplace=True)
NumpyDatasets.add_dataset
NumpyDatasets.add_dataset (X, y=None, inplace=True)
add_ds
add_ds (dsets, X, y=None, inplace=True)
Create test datasets from X (and y) using validation transforms of dsets
= TSDatasets(X_on_disk, y_on_disk, splits=splits, tfms=[None, TSClassification()], inplace=True)
dsets print(dsets.train[0][0].shape, dsets.train[[0,1]][0].shape)
print(dsets.split_idx, dsets.train.split_idx, dsets.valid.split_idx)
print(dsets.new_empty())
dsets
torch.Size([1, 570]) torch.Size([2, 1, 570])
None 0 1
(#0) []
(#60) [(TSTensor(vars:1, len:570, device=cpu, dtype=torch.float32), TensorCategory(0)), (TSTensor(vars:1, len:570, device=cpu, dtype=torch.float32), TensorCategory(0)), (TSTensor(vars:1, len:570, device=cpu, dtype=torch.float32), TensorCategory(0)), (TSTensor(vars:1, len:570, device=cpu, dtype=torch.float32), TensorCategory(0)), (TSTensor(vars:1, len:570, device=cpu, dtype=torch.float32), TensorCategory(0)), (TSTensor(vars:1, len:570, device=cpu, dtype=torch.float32), TensorCategory(1)), (TSTensor(vars:1, len:570, device=cpu, dtype=torch.float32), TensorCategory(1)), (TSTensor(vars:1, len:570, device=cpu, dtype=torch.float32), TensorCategory(1)), (TSTensor(vars:1, len:570, device=cpu, dtype=torch.float32), TensorCategory(1)), (TSTensor(vars:1, len:570, device=cpu, dtype=torch.float32), TensorCategory(1))] ...]
= TSDatasets(X_on_disk, y_on_disk, splits=splits, tfms=[None, TSClassification()], inplace=False)
dsets print(dsets.train[0][0].shape, dsets.train[[0,1]][0].shape)
print(dsets.split_idx, dsets.train.split_idx, dsets.valid.split_idx)
print(dsets.new_empty())
dsets
torch.Size([1, 570]) torch.Size([2, 1, 570])
None 0 1
(#0) []
(#60) [(TSTensor(vars:1, len:570, device=cpu, dtype=torch.float32), TensorCategory([0])), (TSTensor(vars:1, len:570, device=cpu, dtype=torch.float32), TensorCategory([0])), (TSTensor(vars:1, len:570, device=cpu, dtype=torch.float32), TensorCategory([0])), (TSTensor(vars:1, len:570, device=cpu, dtype=torch.float32), TensorCategory([0])), (TSTensor(vars:1, len:570, device=cpu, dtype=torch.float32), TensorCategory([0])), (TSTensor(vars:1, len:570, device=cpu, dtype=torch.float32), TensorCategory([1])), (TSTensor(vars:1, len:570, device=cpu, dtype=torch.float32), TensorCategory([1])), (TSTensor(vars:1, len:570, device=cpu, dtype=torch.float32), TensorCategory([1])), (TSTensor(vars:1, len:570, device=cpu, dtype=torch.float32), TensorCategory([1])), (TSTensor(vars:1, len:570, device=cpu, dtype=torch.float32), TensorCategory([1]))] ...]
= TSDatasets(X_on_disk, y_on_disk, tfms=[None, TSClassification()], splits=splits, inplace=True)
dsets
= random_choice(len(dsets), 10, False)
idxs 0].numpy(), X_on_disk[idxs])
test_eq(dsets[idxs][1].numpy(), y_array[idxs])
test_eq(dsets[idxs][
= random_choice(len(dsets.train), 10, False)
idxs 0].numpy(), X_on_disk[splits[0][idxs]])
test_eq(dsets.train[idxs][1].numpy(), y_array[splits[0][idxs]])
test_eq(dsets.train[idxs][
= random_choice(len(dsets.valid), 10, False)
idxs 0].numpy(), X_on_disk[splits[1][idxs]])
test_eq(dsets.valid[idxs][1].numpy(), y_array[splits[1][idxs]]) test_eq(dsets.valid[idxs][
= TSDatasets(X_on_disk, y_on_disk, tfms=[None, TSClassification()], splits=splits, inplace=False)
dsets assert id(X_on_disk) == id(dsets.tls[0].items) == id(dsets.ptls[0].items)
assert id(X_on_disk) == id(dsets.train.tls[0].items) == id(dsets.train.ptls[0].items)
assert id(X_on_disk) == id(dsets.valid.tls[0].items) == id(dsets.valid.ptls[0].items)
= random_choice(len(dsets), 10, False)
idxs 0].numpy(), X_on_disk[idxs])
test_eq(dsets[idxs][1].numpy(), y_array[idxs])
test_eq(dsets[idxs][
= random_choice(len(dsets.train), 10, False)
idxs 0].numpy(), X_on_disk[splits[0][idxs]])
test_eq(dsets.train[idxs][1].numpy(), y_array[splits[0][idxs]])
test_eq(dsets.train[idxs][
= random_choice(len(dsets.valid), 10, False)
idxs 0].numpy(), X_on_disk[splits[1][idxs]])
test_eq(dsets.valid[idxs][1].numpy(), y_array[splits[1][idxs]]) test_eq(dsets.valid[idxs][
= TSDatasets(X_on_disk, splits=splits, inplace=True)
dsets
= random_choice(len(dsets), 10, False)
idxs 0].numpy(), X_on_disk[idxs])
test_eq(dsets[idxs][
= random_choice(len(dsets.train), 10, False)
idxs 0].numpy(), X_on_disk[splits[0][idxs]])
test_eq(dsets.train[idxs][
= random_choice(len(dsets.valid), 10, False)
idxs 0].numpy(), X_on_disk[splits[1][idxs]]) test_eq(dsets.valid[idxs][
= TSDatasets(X_on_disk, splits=splits, inplace=False)
dsets assert np.shares_memory(X_on_disk, dsets.tls[0].items)
assert np.shares_memory(X_on_disk, dsets.ptls[0].items)
assert np.shares_memory(X_on_disk, dsets.train.tls[0].items)
assert np.shares_memory(X_on_disk, dsets.train.ptls[0].items)
assert np.shares_memory(X_on_disk, dsets.valid.tls[0].items)
assert np.shares_memory(X_on_disk, dsets.valid.ptls[0].items)
= random_choice(len(dsets), 10, False)
idxs 0].numpy(), X_on_disk[idxs])
test_eq(dsets[idxs][
= random_choice(len(dsets.train), 10, False)
idxs 0].numpy(), X_on_disk[splits[0][idxs]])
test_eq(dsets.train[idxs][
= random_choice(len(dsets.valid), 10, False)
idxs 0].numpy(), X_on_disk[splits[1][idxs]]) test_eq(dsets.valid[idxs][
= TSDatasets(X_on_disk, y_array, tfms=None, splits=splits, inplace=True)
dsets
= random_choice(len(dsets), 10, False)
idxs 0].numpy(), X_on_disk[idxs])
test_eq(dsets[idxs][1].numpy(), y_array[idxs])
test_eq(dsets[idxs][
= random_choice(len(dsets.train), 10, False)
idxs 0].numpy(), X_on_disk[splits[0][idxs]])
test_eq(dsets.train[idxs][1].numpy(), y_array[splits[0][idxs]])
test_eq(dsets.train[idxs][
= random_choice(len(dsets.valid), 10, False)
idxs 0].numpy(), X_on_disk[splits[1][idxs]])
test_eq(dsets.valid[idxs][1].numpy(), y_array[splits[1][idxs]]) test_eq(dsets.valid[idxs][
= TSDatasets(X_on_disk, y_array, tfms=None, splits=splits, inplace=False)
dsets assert np.shares_memory(X_on_disk, dsets.tls[0].items)
assert np.shares_memory(X_on_disk, dsets.ptls[0].items)
assert np.shares_memory(X_on_disk, dsets.train.tls[0].items)
assert np.shares_memory(X_on_disk, dsets.train.ptls[0].items)
assert np.shares_memory(X_on_disk, dsets.valid.tls[0].items)
assert np.shares_memory(X_on_disk, dsets.valid.ptls[0].items)
= random_choice(len(dsets), 10, False)
idxs 0].numpy(), X_on_disk[idxs])
test_eq(dsets[idxs][1].numpy(), y_array[idxs])
test_eq(dsets[idxs][
= random_choice(len(dsets.train), 10, False)
idxs 0].numpy(), X_on_disk[splits[0][idxs]])
test_eq(dsets.train[idxs][1].numpy(), y_array[splits[0][idxs]])
test_eq(dsets.train[idxs][
= random_choice(len(dsets.valid), 10, False)
idxs 0].numpy(), X_on_disk[splits[1][idxs]])
test_eq(dsets.valid[idxs][1].numpy(), y_array[splits[1][idxs]]) test_eq(dsets.valid[idxs][
= TSDatasets(X_on_disk, y_on_disk, tfms=[None, TSClassification()], splits=None, inplace=True)
dsets
= random_choice(len(dsets), 10, False)
idxs 0].numpy(), X_on_disk[idxs])
test_eq(dsets[idxs][1].numpy(), y_array[idxs]) test_eq(dsets[idxs][
= TSDatasets(X_on_disk, y_on_disk, tfms=[None, TSClassification()], splits=None, inplace=False)
dsets assert id(X_on_disk) == id(dsets.tls[0].items) == id(dsets.ptls[0].items)
assert id(X_on_disk) == id(dsets.train.tls[0].items) == id(dsets.train.ptls[0].items)
= random_choice(len(dsets), 10, False)
idxs 0].numpy(), X_on_disk[idxs])
test_eq(dsets[idxs][1].numpy(), y_array[idxs]) test_eq(dsets[idxs][
= TSDatasets(X_on_disk, y_array, tfms=None, splits=splits)
dsets 0:10], dsets.add_dataset(X_on_disk[0:10], y_array[0:10])[:])
test_eq(dsets.train[0:10][0], dsets.add_dataset(X_on_disk[0:10])[:][0]) test_eq(dsets.train[
= TSDatasets(X_on_disk, y_array, tfms=None, splits=splits)
dsets 'export/dsets.pth')
torch.save(dsets, del dsets
= torch.load('export/dsets.pth')
dsets dsets
(#60) [(TSTensor(vars:1, len:570, device=cpu, dtype=torch.float32), tensor(0)), (TSTensor(vars:1, len:570, device=cpu, dtype=torch.float32), tensor(0)), (TSTensor(vars:1, len:570, device=cpu, dtype=torch.float32), tensor(0)), (TSTensor(vars:1, len:570, device=cpu, dtype=torch.float32), tensor(0)), (TSTensor(vars:1, len:570, device=cpu, dtype=torch.float32), tensor(0)), (TSTensor(vars:1, len:570, device=cpu, dtype=torch.float32), tensor(1)), (TSTensor(vars:1, len:570, device=cpu, dtype=torch.float32), tensor(1)), (TSTensor(vars:1, len:570, device=cpu, dtype=torch.float32), tensor(1)), (TSTensor(vars:1, len:570, device=cpu, dtype=torch.float32), tensor(1)), (TSTensor(vars:1, len:570, device=cpu, dtype=torch.float32), tensor(1))] ...]
= TSDatasets(X_on_disk, y_array, tfms=None, splits=splits)
dsets 'export/dsets.pth')
torch.save(dsets.train, del dsets
= torch.load('export/dsets.pth')
dsets dsets
(#30) [(TSTensor(vars:1, len:570, device=cpu, dtype=torch.float32), tensor(0)), (TSTensor(vars:1, len:570, device=cpu, dtype=torch.float32), tensor(0)), (TSTensor(vars:1, len:570, device=cpu, dtype=torch.float32), tensor(0)), (TSTensor(vars:1, len:570, device=cpu, dtype=torch.float32), tensor(0)), (TSTensor(vars:1, len:570, device=cpu, dtype=torch.float32), tensor(0)), (TSTensor(vars:1, len:570, device=cpu, dtype=torch.float32), tensor(1)), (TSTensor(vars:1, len:570, device=cpu, dtype=torch.float32), tensor(1)), (TSTensor(vars:1, len:570, device=cpu, dtype=torch.float32), tensor(1)), (TSTensor(vars:1, len:570, device=cpu, dtype=torch.float32), tensor(1)), (TSTensor(vars:1, len:570, device=cpu, dtype=torch.float32), tensor(1))] ...]
= TSDatasets(X_on_disk, y_array, tfms=None, splits=splits)
dsets len(dsets.train), len(X_train))
test_eq(= TSDatasets(X_on_disk, y_array, tfms=None, splits=splits)
dsets len(dsets.train), len(X_train))
test_eq(= TSDatasets(X_on_disk, y_array, tfms=[add(1), TSCategorize()], splits=splits)
dsets len(dsets.train), len(X_train))
test_eq(# test_eq(dsets.train[0][0].data, tensor(X_train[0] + 1))
0][1].item(), y_tensor[0]) test_eq(dsets.train[
= TSDatasets(X_on_disk, y_on_disk, tfms=[None, TSCategorize()], splits=splits)
dsets len(dsets.add_test(X_train, y_train)), len(X_train))
test_eq(len(dsets.add_unlabeled(X_train)), len(X_train)) test_eq(
= torch.randn(100, 4, 50)
X_tensor = torch.randint(0, 2, size=(len(X_tensor),))
y_tensor = (np.arange(80), np.arange(80, 100))
tensor_splits = TSDatasets(X_tensor, y_tensor, tfms=[None, TSClassification()], splits=tensor_splits)
dsets type(dsets[0][0]), TSTensor) test_eq(
TSDataLoader
TSDataLoader (dataset, bs=64, shuffle=False, drop_last=False, num_workers=0, verbose=False, do_setup=True, vocab=None, sort=False, weights=None, partial_n=None, sampler=None, pin_memory=False, timeout=0, batch_size=None, indexed=None, n=None, device=None, persistent_workers=False, pin_memory_device='', wif=None, before_iter=None, after_item=None, before_batch=None, after_batch=None, after_iter=None, create_batches=None, create_item=None, create_batch=None, retain=None, get_idxs=None, sample=None, shuffle_fn=None, do_batch=None)
Transformed DataLoader
Type | Default | Details | |
---|---|---|---|
dataset | Map- or iterable-style dataset from which to load the data | ||
bs | int | 64 | Size of batch |
shuffle | bool | False | Whether to shuffle data |
drop_last | bool | False | |
num_workers | int | None | Number of CPU cores to use in parallel (default: All available up to 16) |
verbose | bool | False | Whether to print verbose logs |
do_setup | bool | True | Whether to run setup() for batch transform(s) |
vocab | NoneType | None | |
sort | bool | False | |
weights | NoneType | None | |
partial_n | NoneType | None | |
sampler | NoneType | None | |
pin_memory | bool | False | |
timeout | int | 0 | |
batch_size | NoneType | None | |
indexed | NoneType | None | |
n | NoneType | None | |
device | NoneType | None | |
persistent_workers | bool | False | |
pin_memory_device | str | ||
wif | NoneType | None | |
before_iter | NoneType | None | |
after_item | NoneType | None | |
before_batch | NoneType | None | |
after_batch | NoneType | None | |
after_iter | NoneType | None | |
create_batches | NoneType | None | |
create_item | NoneType | None | |
create_batch | NoneType | None | |
retain | NoneType | None | |
get_idxs | NoneType | None | |
sample | NoneType | None | |
shuffle_fn | NoneType | None | |
do_batch | NoneType | None |
NumpyDataLoader
NumpyDataLoader (dataset, bs=64, shuffle=False, drop_last=False, num_workers=0, verbose=False, do_setup=True, vocab=None, sort=False, weights=None, partial_n=None, sampler=None, pin_memory=False, timeout=0, batch_size=None, indexed=None, n=None, device=None, persistent_workers=False, pin_memory_device='', wif=None, before_iter=None, after_item=None, before_batch=None, after_batch=None, after_iter=None, create_batches=None, create_item=None, create_batch=None, retain=None, get_idxs=None, sample=None, shuffle_fn=None, do_batch=None)
Transformed DataLoader
Type | Default | Details | |
---|---|---|---|
dataset | Map- or iterable-style dataset from which to load the data | ||
bs | int | 64 | Size of batch |
shuffle | bool | False | Whether to shuffle data |
drop_last | bool | False | |
num_workers | int | None | Number of CPU cores to use in parallel (default: All available up to 16) |
verbose | bool | False | Whether to print verbose logs |
do_setup | bool | True | Whether to run setup() for batch transform(s) |
vocab | NoneType | None | |
sort | bool | False | |
weights | NoneType | None | |
partial_n | NoneType | None | |
sampler | NoneType | None | |
pin_memory | bool | False | |
timeout | int | 0 | |
batch_size | NoneType | None | |
indexed | NoneType | None | |
n | NoneType | None | |
device | NoneType | None | |
persistent_workers | bool | False | |
pin_memory_device | str | ||
wif | NoneType | None | |
before_iter | NoneType | None | |
after_item | NoneType | None | |
before_batch | NoneType | None | |
after_batch | NoneType | None | |
after_iter | NoneType | None | |
create_batches | NoneType | None | |
create_item | NoneType | None | |
create_batch | NoneType | None | |
retain | NoneType | None | |
get_idxs | NoneType | None | |
sample | NoneType | None | |
shuffle_fn | NoneType | None | |
do_batch | NoneType | None |
TSDataLoaders
TSDataLoaders (*loaders, path='.', device=None)
Basic wrapper around several DataLoader
s.
NumpyDataLoaders
NumpyDataLoaders (*loaders, path='.', device=None)
Basic wrapper around several DataLoader
s.
StratifiedSampler
StratifiedSampler (y, bs:int=64, shuffle:bool=False, drop_last:bool=False)
Sampler where batches preserve the percentage of samples for each class
Type | Default | Details | |
---|---|---|---|
y | The target variable for supervised learning problems. Stratification is done based on the y labels. | ||
bs | int | 64 | Batch size |
shuffle | bool | False | Flag to shuffle each class’s samples before splitting into batches. |
drop_last | bool | False | Flag to drop the last incomplete batch. |
= np.concatenate([np.zeros(90), np.ones(10)])
a = StratifiedSampler(a, bs=32, shuffle=True, drop_last=True)
sampler = np.array(list(iter(sampler)))
idxs print(idxs[:32])
print(a[idxs][:32])
32].mean(), .1) test_eq(a[idxs][:
[[ 0 2 8 17 18 21 27 29 34 38 39 43 45 48 52 54 55 60 61 63 66 67 68 69
71 73 78 80 81 84 90 92 95 99 1 6 11 12 15 16 20 23 24 28 30 33 36 37
40 41 42 44 49 59 62 64 65 74 75 76 77 79 86 87 91 93 96 3 4 5 7 9
10 13 14 19 22 25 26 31 32 35 46 47 50 51 53 56 57 58 70 72 82 83 85 88
89 94 97 98]]
[[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0. 1. 1. 1. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 1. 1. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
0. 1. 1. 1.]]
get_c
get_c (dls)
get_best_dls_params
get_best_dls_params (dls, n_iters=10, num_workers=[0, 1, 2, 4, 8], pin_memory=[True, False], prefetch_factor=[2, 4, 8], return_best=True, verbose=True)
get_best_dl_params
get_best_dl_params (dl, n_iters=10, num_workers=[0, 1, 2, 4, 8], pin_memory=[True, False], prefetch_factor=[2, 4, 8], return_best=True, verbose=True)
get_ts_dls
get_ts_dls (X, y=None, splits=None, sel_vars=None, sel_steps=None, tfms=None, inplace=True, path='.', bs=64, batch_tfms=None, num_workers=0, device=None, shuffle_train=True, drop_last=True, weights=None, partial_n=None, sampler=None, sort=False, **kwargs)
# Tests
= np.arange(10)
a
for s in [None, np.arange(10), np.arange(10).tolist(), L(np.arange(10).tolist()), (np.arange(10).tolist(), None), (np.arange(10).tolist(), L())]:
10).tolist()), L())) test_eq(_check_splits(a, s), (L(np.arange(
get_subset_dl
get_subset_dl (dl, idxs)
get_ts_dl
get_ts_dl (X, y=None, split=None, sel_vars=None, sel_steps=None, tfms=None, inplace=True, path='.', bs=64, batch_tfms=None, num_workers=0, device=None, shuffle_train=True, drop_last=True, weights=None, partial_n=None, sampler=None, sort=False, **kwargs)
= get_UCR_data(dsid, on_disk=False, split_data=False)
X, y, splits = get_ts_dls(X, y, tfms=[None, TSClassification()], splits=splits, bs=8)
dls = get_best_dls_params(dls, prefetch_factor=[2, 4, 8, 16]) dls
Dataloader 0
num_workers: 0 pin_memory: True prefetch_factor: 2 - time: 1.400 ms/iter
num_workers: 0 pin_memory: False prefetch_factor: 2 - time: 0.620 ms/iter
best dl params:
best num_workers : 0
best pin_memory : False
best prefetch_factor: 2
return_best : True
Dataloader 1
num_workers: 0 pin_memory: True prefetch_factor: 2 - time: 0.261 ms/iter
num_workers: 0 pin_memory: False prefetch_factor: 2 - time: 0.306 ms/iter
best dl params:
best num_workers : 0
best pin_memory : True
best prefetch_factor: 2
return_best : True
= np.random.randint(0, 4, size=len(X))
y_int = get_ts_dls(X, y_int, splits=splits, bs=8)
dls hasattr(dls, "vocab"), False)
test_eq(
= get_ts_dls(X, y_int, splits=splits, bs=8, vocab=[0,1,2,3])
dls 0,1,2,3])
test_eq(dls.vocab, [4)
test_eq(dls.c, True) test_eq(dls.cat,
= get_UCR_data(dsid, on_disk=False, split_data=False)
X, y, splits = get_ts_dls(X, y, tfms=[None, TSClassification()], splits=splits, bs=8)
dls =first(dls.train)
b
dls.decode(b)1], dls.vars)
test_eq(X.shape[-1], dls.len) test_eq(X.shape[
= get_UCR_data('OliveOil', on_disk=False, split_data=False)
X, y, splits = get_ts_dls(X, y, tfms=[None, TSClassification()], splits=splits, bs=64, inplace=True)
dls
= random_choice(len(dls.valid_ds), 10, False)
idxs = get_subset_dl(dls.train, idxs)
new_dl
= random_choice(len(dls.valid_ds), 10, False)
idxs = get_subset_dl(dls.valid, idxs)
new_dl 0].cpu().numpy(), X[splits[1][idxs]]) test_eq(new_dl.one_batch()[
= get_UCR_data('OliveOil', on_disk=False, split_data=False)
X, y, splits = np.random.rand(len(X))
weights = get_ts_dls(X, y, tfms=[None, TSClassification()], splits=splits, bs=64, inplace=True, weights=weights)
dls = weights[splits[0]] / weights[splits[0]].sum()
weights2
test_eq(dls.train.weights, weights2)None) test_eq(dls.valid.weights,
= 12
partial_n = get_UCR_data('OliveOil', on_disk=False, split_data=False)
X, y, splits = get_ts_dls(X, y, splits=splits, tfms=[None, TSClassification()], bs=64, inplace=True, partial_n=partial_n)
dls len(dls.train.one_batch()[0]), partial_n)
test_eq(
= .1
partial_n = get_UCR_data('OliveOil', on_disk=False, split_data=False)
X, y, splits = get_ts_dls(X, y, tfms=[None, TSClassification()], bs=64, inplace=True, partial_n=partial_n)
dls len(dls.train.one_batch()[0]), int(round(len(dls.train.dataset) * partial_n))) test_eq(
You’ll now be able to pass a sampler to a tsai
dataloader.
You should use a sampler for the train set and a sampler for the validation set. You’ll need to pass an object with the same length as each dataset. For example, the splits like in the case below.
⚠️ Remember to set shuffle=False when using a sampler since they a mutually exclusive. This means that when you use a sampler, you always need to set the shuffle in the dataloader to False. The sampler will control whether the indices are shuffled or not (you can set shuffle to True or False in the sampler).
drop_last is managed in the dataloder though.
= get_UCR_data('OliveOil', on_disk=False, split_data=False)
X, y, splits = torch.utils.data.sampler.RandomSampler(splits[0])
train_sampler = torch.utils.data.sampler.SequentialSampler(splits[1])
valid_sampler = get_ts_dls(X, y, splits=splits, tfms=[None, TSClassification()], bs=8, inplace=True,
dls =False, drop_last=True, sampler=[train_sampler, valid_sampler])
shuffleprint('train')
for _ in dls.train:
print(dls.train.idxs)
print('valid')
for _ in dls.valid:
print(dls.valid.idxs)
train
[22, 25, 16, 3, 26, 28, 7, 18]
[5, 4, 12, 27, 29, 24, 9, 11]
[0, 2, 8, 17, 21, 20, 23, 10]
valid
[0, 1, 2, 3, 4, 5, 6, 7]
[8, 9, 10, 11, 12, 13, 14, 15]
[16, 17, 18, 19, 20, 21, 22, 23]
[24, 25, 26, 27, 28, 29]
= get_UCR_data('OliveOil', on_disk=False, split_data=False)
X, y, splits = torch.utils.data.sampler.SequentialSampler(splits[0])
train_sampler = torch.utils.data.sampler.SequentialSampler(splits[1])
valid_sampler = get_ts_dls(X, y, splits=splits, tfms=[None, TSClassification()], bs=64, inplace=True,
dls =False, sampler=[train_sampler, valid_sampler])
shufflelen(splits[0])))
test_eq(dls.get_idxs(), np.arange(len(splits[0])))
test_eq(dls.train.get_idxs(), np.arange(len(splits[1])))
test_eq(dls.valid.get_idxs(), np.arange(= dls.valid.one_batch()[0].cpu().numpy()
xb
test_close(xb, X[dls.valid.split_idxs])
= get_UCR_data('OliveOil', on_disk=False, split_data=False)
X, y, splits = torch.utils.data.sampler.RandomSampler(splits[0])
train_sampler = torch.utils.data.sampler.SequentialSampler(splits[0])
valid_sampler = get_ts_dls(X, y, splits=splits, tfms=[None, TSClassification()], bs=32, inplace=True,
dls =False, drop_last=True, sampler=[train_sampler, valid_sampler])
shufflelen(splits[0])))
test_ne(dls.train.get_idxs(), np.arange(len(splits[0])))
test_eq(np.sort(dls.train.get_idxs()), np.arange(len(splits[1]))) test_eq(dls.valid.get_idxs(), np.arange(
= get_UCR_data('OliveOil', on_disk=False, split_data=False)
X, y, splits = get_ts_dls(X, y, tfms=[None, TSClassification()], splits=splits, bs=64, inplace=False)
dls
= random_choice(len(dls.valid_ds), 10, False)
idxs = get_subset_dl(dls.train, idxs)
new_dl
= random_choice(len(dls.valid_ds), 10, False)
idxs = get_subset_dl(dls.valid, idxs)
new_dl 0].cpu().numpy(), X[splits[1][idxs]]) test_eq(new_dl.one_batch()[
= get_UCR_data('OliveOil', on_disk=False, split_data=False)
X, y, splits = get_ts_dls(X, y, tfms=[None, TSClassification()], splits=splits, bs=8)
dls = dls.one_batch()
b = dls.input_idxs
input_idxs 0].cpu().numpy(), X[input_idxs])
test_eq(b[= dls.train.one_batch()
b = dls.train.input_idxs
input_idxs 0].cpu().numpy(), X[input_idxs])
test_eq(b[assert max(input_idxs) < len(splits[0])
= dls.valid.one_batch()
b = dls.valid.input_idxs
input_idxs 0].cpu().numpy(), X[input_idxs])
test_eq(b[assert min(input_idxs) >= len(splits[0])
= get_UCR_data('OliveOil', on_disk=False, split_data=False)
X, y, splits = get_ts_dls(X, y, tfms=[None, TSCategorize()], splits=splits, bs=8)
dls =first(dls.train)
b
dls.decode(b)1], dls.vars)
test_eq(X.shape[-1], dls.len) test_eq(X.shape[
= get_UCR_data('OliveOil', on_disk=False, split_data=False)
X, y, splits = get_ts_dls(X, y, tfms=[None, TSCategorize()], splits=splits, bs=8, weights=np.random.randint(0, 3, len(y)))
dls =first(dls.train)
b
dls.decode(b)1], dls.vars)
test_eq(X.shape[-1], dls.len) test_eq(X.shape[
= get_UCR_data('OliveOil', on_disk=False, split_data=False)
X, y, splits = TSDatasets(X, y, tfms=[None, TSCategorize()], splits=splits)
dsets = TSDataLoaders.from_dsets(dsets.train, dsets.valid, device=default_device(), bs=4)
ts_dls 'export/ts_dls.pth')
torch.save(ts_dls, del ts_dls
= torch.load('export/ts_dls.pth')
ts_dls for xb,yb in ts_dls.train:
test_eq(tensor(X[ts_dls.train.idxs]), xb.cpu())
= get_UCR_data('OliveOil', on_disk=False, split_data=False)
X, y, splits = get_ts_dls(X, y, tfms=[None, TSCategorize()], splits=splits, bs=4)
dls for xb,yb in dls.train:
test_eq(xb.cpu().numpy(), X[dls.train.input_idxs])for xb,yb in dls.valid:
test_eq(xb.cpu().numpy(), X[dls.valid.input_idxs])
True, False, True, False)) test_eq((ts_dls.train.shuffle, ts_dls.valid.shuffle, ts_dls.train.drop_last, ts_dls.valid.drop_last), (
= 'OliveOil'
dsid = get_UCR_data(dsid, split_data=False)
X, y, splits = get_ts_dls(X, y, tfms=[None, TSCategorize()], splits=splits, bs=8, num_workers=0)
dls = first(dls.train)
xb, yb test_eq(tensor(X[dls.train.idxs]), xb.cpu())
True, False, True, False)) test_eq((dls.train.shuffle, dls.valid.shuffle, dls.train.drop_last, dls.valid.drop_last), (
# multiclass
= 'OliveOil'
dsid = get_UCR_data(dsid, on_disk=True, split_data=False)
X, y, splits = get_ts_dls(X, y, tfms=[None, TSCategorize()], splits=splits, inplace=True)
dls
dls.show_dist()
dls.train.show_dist()= first(dls.train)
xb,yb True, 4))
test_eq((dls.cat, dls.c), (None)
test_ne(dls.cws.cpu().numpy(),
dls.decoder((xb, ))0], ))
dls.decoder((xb[
dls.decoder((xb, yb))0], yb[0]))
dls.decoder((xb[
dls.decoder(yb)0]) dls.decoder(yb[
'1'
= dls.new_dl(X)
new_dl first(new_dl)
(TSTensor(samples:60, vars:1, len:570, device=cpu, dtype=torch.float32),)
= dls.new_dl(X, y=y)
new_dl first(new_dl)
(TSTensor(samples:60, vars:1, len:570, device=cpu, dtype=torch.float32),
TensorCategory([2, 3, 2, 2, 0, 1, 1, 3, 3, 1, 2, 0, 0, 3, 0, 1, 0, 3, 3, 3, 1,
3, 3, 3, 3, 3, 0, 3, 1, 1, 3, 3, 2, 3, 3, 3, 1, 1, 3, 2, 3, 0,
3, 0, 3, 1, 1, 2, 1, 1, 1, 3, 3, 1, 2, 1, 1, 3, 0, 0]))
dls.train.dataset.split_idxs, dls.train.dataset.splits, dls.valid.split_idxs
(array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29], dtype=int8),
(#30) [0,1,2,3,4,5,6,7,8,9...],
array([30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46,
47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59], dtype=int8))
# 2d input array and tfms == None return a NoTfmLists object
= get_UCR_data('OliveOil', on_disk=False, split_data=False)
X, y, splits = X[:, 0]
X =[None, TSCategorize()]
tfms= get_ts_dls(X, y, splits=splits, tfms=tfms, bs=8)
dls 1, dls.vars)
test_eq(-1], dls.len)
test_eq(X.shape[type(dls.tls[0]).__name__, 'NoTfmLists')
test_eq(= get_ts_dls(X, y, splits=splits, tfms=tfms, bs=8, inplace=False)
dls 1, dls.vars)
test_eq(-1], dls.len)
test_eq(X.shape[type(dls.tls[0]).__name__, 'NoTfmLists') test_eq(
# regression
= 'OliveOil'
dsid = get_UCR_data(dsid, on_disk=True, split_data=False)
X, y, splits = get_ts_dls(X, np.random.rand(60, ), tfms=[None, ToNumpyTensor], splits=splits)
dls
dls.show_dist()
dls.train.show_dist()= first(dls.train)
xb,yb
dls.decoder((xb, ))0], ))
dls.decoder((xb[
dls.decoder((xb, yb))0], yb[0]))
dls.decoder((xb[
dls.decoder(yb)0])
dls.decoder(yb[False, 1))
test_eq((dls.cat, dls.c), (None) test_eq(dls.cws,
# regression, multilabel
= 'OliveOil'
dsid = get_UCR_data(dsid, on_disk=True, split_data=False)
X, y, splits = get_ts_dls(X, np.random.rand(60, 3) * 5, tfms=[None, ToNumpyTensor], splits=splits)
dls
dls.show_dist()
dls.train.show_dist()= first(dls.train)
xb,yb
dls.decoder((xb, ))0], ))
dls.decoder((xb[
dls.decoder((xb, yb))0], yb[0]))
dls.decoder((xb[
dls.decoder(yb)0])
dls.decoder(yb[False, 1, 3))
test_eq((dls.cat, dls.c, dls.d),(None) test_eq(dls.cws,
# multiclass, multilabel
= 'OliveOil'
dsid = get_UCR_data(dsid, on_disk=True, split_data=False)
X, y, splits = {
cm '1':'A',
'2':['B', 'C'],
'3':['B', 'D'] ,
'4':'E',
}= cm.keys()
keys = {k:v for k,v in zip(keys, [listify(v) for v in cm.values()])}
new_cm = np.array([new_cm[yi] if yi in keys else listify(yi) for yi in y], dtype=object)
y_multi = get_ts_dls(X, y_multi, tfms=[None, TSMultiLabelClassification()], splits=splits)
dls
dls.show_dist()
dls.train.show_dist()= first(dls.train)
xb,yb
dls.decoder((xb, ))0], ))
dls.decoder((xb[
dls.decoder((xb, yb))0], yb[0]))
dls.decoder((xb[
dls.decoder(yb)0])
dls.decoder(yb[True, 5))
test_eq((dls.cat, dls.c), (None) test_ne(dls.cws.cpu().numpy(),
= 'OliveOil'
dsid = get_UCR_data(dsid, on_disk=True, split_data=False)
X, y, splits = {
cm '1':'A',
'2':['B', 'C'],
'3':['B', 'D'] ,
'4':'E',
}= cm.keys()
keys = {k:v for k,v in zip(keys, [listify(v) for v in cm.values()])}
new_cm = np.array([new_cm[yi] if yi in keys else listify(yi) for yi in y], dtype=object)
y_multi = get_ts_dls(X, y_multi, tfms=[None, TSMultiLabelClassification()], splits=splits)
dls 0]).one_batch().shape, (1, 570))
test_eq(dls.new(X[15]).one_batch().shape, (15, 1, 570))
test_eq(dls.new(X[:0]).one_batch().shape, (1, 570))
test_eq(dls.train.new(X[15]).one_batch().shape, (15, 1, 570)) test_eq(dls.valid.new(X[:
= 25
bs = TSDatasets(X, y, tfms=[None, TSCategorize()], splits=splits)
dsets = TSDataLoaders.from_dsets(dsets.train, dsets.valid, bs=[bs, bs*2], batch_tfms=add(1), num_workers=0)
dls = dls.train.one_batch()
xb,yb 0]][dls.train.idxs]) + 1) test_eq(xb.cpu().data, tensor(X_on_disk[splits[
= TSDatasets(X, y, tfms=[None, TSCategorize()], splits=splits)
dsets = TSDataLoaders.from_dsets(dsets.train, dsets.valid, bs=[bs, bs*2])
dls = dls.train.one_batch()
xb,yb min(bs, len(splits[0])), X.shape[1], X.shape[-1]))
test_eq(xb.shape, (= iter(dls.valid)
it for xb,yb in it:
1]][dls.valid.idxs])) test_close(xb.cpu(), TSTensor(X[splits[
= 64
bs = TSDatasets(X, y, tfms=[add(1), TSCategorize()], splits=RandomSplitter(valid_pct=.3)(y_array))
dsets = TSDataLoaders.from_dsets(dsets.train, dsets.valid, bs=[bs, bs*2])
dls = dls.train.one_batch()
xb,yb min(bs, len(dsets.train)), X_on_disk.shape[1], X_on_disk.shape[-1]))
test_eq(xb.shape, (= dls.valid.one_batch()
xb,yb min(bs*2, len(dsets.valid)), X_on_disk.shape[1], X_on_disk.shape[-1])) test_eq(xb.shape, (
= TSDatasets(X_on_disk, y_array, tfms=[None, TSCategorize()], splits=splits)
dsets = TSDataLoaders.from_dsets(dsets.train, dsets.valid, bs=[32, 64])
dls for i in range(10):
= dls.train if random.random() < .5 else dls.valid
dl = dl.one_batch()
xb,yb
torch.equal(xb.cpu(), TSTensor(X_on_disk[dl.input_idxs]))
= TSDatasets(X_on_disk, y_array, tfms=[None, TSCategorize()])
dsets = TSDataLoaders.from_dsets(dsets, bs=32)
dls for i in range(10):
= dls.one_batch()
xb,yb
torch.equal(xb.cpu(), TSTensor(X_on_disk[dl.input_idxs]))
= TSDatasets(X_on_disk, tfms=None)
dsets = TSDataLoaders.from_dsets(dsets, bs=32)
dls for i in range(10):
= dls.one_batch()
xb 0].cpu(), TSTensor(X_on_disk[dl.input_idxs])) torch.equal(xb[
= TSDatasets(X_on_disk, y_array, tfms=[None, TSCategorize()])
dsets = TSDataLoaders.from_dsets(dsets, bs=32)
dls len(X_on_disk)).tolist())) test_eq(dls.split_idxs, L(np.arange(
= get_UCR_data('NATOPS', return_split=False)
X, y, splits = [None, [TSCategorize()]]
tfms = get_ts_dls(X, y, tfms=tfms, splits=splits, bs=[64, 128])
dls
dls.show_batch() dls.show_dist()
# test passing a list with categories instead of a numpy array
= 'NATOPS'
dsid = 64
bs = get_UCR_data(dsid, return_split=False)
X2, y2, splits2 = sorted(set(y))
vocab = [None, [TSCategorize(vocab=vocab)]]
tfms = TSDatasets(X2, y2, tfms=tfms, splits=splits2)
dsets = TSDataLoaders.from_dsets(dsets.train, dsets.valid, bs=[bs, bs*2])
dls dls.train.one_batch()
(TSTensor(samples:64, vars:24, len:51, device=cpu, dtype=torch.float32),
TensorCategory([0, 3, 0, 5, 0, 0, 5, 3, 3, 1, 2, 0, 0, 2, 5, 2, 2, 4, 5, 3, 2,
4, 2, 1, 1, 0, 1, 2, 0, 4, 4, 4, 4, 2, 0, 0, 3, 3, 0, 5, 4, 3,
2, 5, 5, 2, 2, 4, 3, 0, 2, 4, 4, 5, 5, 0, 5, 3, 2, 1, 0, 3, 4,
2]))
# MultiCategory
= 64
bs = 100
n_epochs = [None, [MultiCategorize()]]
tfms = TSDatasets(X2, y2, tfms=tfms, splits=splits2)
dsets = TSDataLoaders.from_dsets(dsets.train, dsets.valid, bs=bs)
dls dls.train.one_batch()
(TSTensor(samples:64, vars:24, len:51, device=cpu, dtype=torch.float32),
TensorMultiCategory([[7, 0, 1],
[4, 0, 1],
[7, 0, 1],
[5, 0, 1],
[2, 0, 1],
[2, 0, 1],
[2, 0, 1],
[7, 0, 1],
[5, 0, 1],
[3, 0, 1],
[6, 0, 1],
[7, 0, 1],
[3, 0, 1],
[6, 0, 1],
[7, 0, 1],
[7, 0, 1],
[6, 0, 1],
[7, 0, 1],
[5, 0, 1],
[3, 0, 1],
[3, 0, 1],
[7, 0, 1],
[7, 0, 1],
[2, 0, 1],
[4, 0, 1],
[4, 0, 1],
[2, 0, 1],
[4, 0, 1],
[6, 0, 1],
[2, 0, 1],
[2, 0, 1],
[5, 0, 1],
[2, 0, 1],
[5, 0, 1],
[4, 0, 1],
[7, 0, 1],
[2, 0, 1],
[3, 0, 1],
[4, 0, 1],
[6, 0, 1],
[2, 0, 1],
[7, 0, 1],
[2, 0, 1],
[3, 0, 1],
[4, 0, 1],
[5, 0, 1],
[5, 0, 1],
[2, 0, 1],
[5, 0, 1],
[2, 0, 1],
[3, 0, 1],
[5, 0, 1],
[6, 0, 1],
[7, 0, 1],
[5, 0, 1],
[2, 0, 1],
[7, 0, 1],
[4, 0, 1],
[5, 0, 1],
[6, 0, 1],
[7, 0, 1],
[4, 0, 1],
[7, 0, 1],
[3, 0, 1]]))
The combination of splits, sel_vars and sel_steps is very powerful, as it allows you to perform advanced indexing of the array-like X.
from tsai.data.validation import TSSplitter
= np.arange(16*5*50).reshape(16,5,50)
X = alphabet[np.random.randint(0,3, 16)]
y = TSSplitter(show_plot=False)(y)
splits = [None, TSCategorize()]
tfms = None
batch_tfms = get_ts_dls(X, y, splits=splits, sel_vars=[0, 1, 3], sel_steps=slice(-10, None), tfms=tfms, batch_tfms=batch_tfms)
dls =dls.train.one_batch()
xb,yb0, 1, 3]][...,slice(-10, None)], xb.cpu().numpy())
test_close(X[dls.input_idxs][:, [= dls.train.new_dl(X[:5], y[:5])
new_dl print(new_dl.one_batch())
= dls.new_empty() # when exported
new_empty_dl = new_empty_dl.new_dl(X[:10], y[:10], bs=64) # after export
dl dl.one_batch()
(TSTensor(samples:5, vars:3, len:10, device=cpu, dtype=torch.int64), TensorCategory([2, 2, 2, 2, 2]))
(TSTensor(samples:10, vars:3, len:10, device=cpu, dtype=torch.int64),
TensorCategory([2, 2, 2, 0, 2, 2, 0, 2, 1, 1]))
get_dl_percent_per_epoch
get_dl_percent_per_epoch (dl, model, n_batches=None)
get_time_per_batch
get_time_per_batch (dl, model=None, n_batches=None)
= get_UCR_data('NATOPS', split_data=False)
X, y, splits = [None, [TSCategorize()]]
tfms = get_ts_dls(X, y, tfms=tfms, splits=splits)
dls = dls.train
train_dl = train_dl.one_batch()
xb, _ = nn.Linear(xb.shape[-1], 2).to(xb.device)
model = get_dl_percent_per_epoch(train_dl, model, n_batches=10)
t print(t)
93.70%