from fastai.tabular.core import *
from tsai.data.tabular import *TabModel
This is an implementation created by Ignacio Oguiza (oguiza@timeseriesAI.co) based on fastai’s TabularModel.
We built it so that it’s easy to change the head of the model, something that is particularly interesting when building hybrid models.
TabHead
def TabHead(
emb_szs, n_cont, c_out, layers:NoneType=None, fc_dropout:NoneType=None, y_range:NoneType=None, use_bn:bool=True,
bn_final:bool=False, lin_first:bool=False, act:ReLU=ReLU(inplace=True), skip:bool=False
):
Basic head for tabular data.
TabBackbone
def TabBackbone(
emb_szs, n_cont, embed_p:float=0.0, bn_cont:bool=True
):
Same as nn.Module, but no need for subclasses to call super().__init__
TabModel
def TabModel(
emb_szs, n_cont, c_out, layers:NoneType=None, fc_dropout:NoneType=None, embed_p:float=0.0, y_range:NoneType=None,
use_bn:bool=True, bn_final:bool=False, bn_cont:bool=True, lin_first:bool=False, act:ReLU=ReLU(inplace=True),
skip:bool=False
):
Basic model for tabular data.
path = untar_data(URLs.ADULT_SAMPLE)
df = pd.read_csv(path/'adult.csv')
# df['salary'] = np.random.rand(len(df)) # uncomment to simulate a cont dependent variable
procs = [Categorify, FillMissing, Normalize]
cat_names = ['workclass', 'education', 'marital-status', 'occupation', 'relationship', 'race']
cont_names = ['age', 'fnlwgt', 'education-num']
y_names = ['salary']
y_block = RegressionBlock() if isinstance(df['salary'].values[0], float) else CategoryBlock()
splits = RandomSplitter()(range_of(df))
pd.options.mode.chained_assignment=None
to = TabularPandas(df, procs=procs, cat_names=cat_names, cont_names=cont_names, y_names=y_names, y_block=y_block, splits=splits, inplace=True,
reduce_memory=False)
to.show(5)
tab_dls = to.dataloaders(bs=16, val_bs=32)
b = first(tab_dls.train)
test_eq((b[0].shape, b[1].shape, b[2].shape), (torch.Size([16, 7]), torch.Size([16, 3]), torch.Size([16, 1])))| workclass | education | marital-status | occupation | relationship | race | education-num_na | age | fnlwgt | education-num | salary | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 20505 | Private | HS-grad | Married-civ-spouse | Sales | Husband | White | False | 47.0 | 197836.0 | 9.0 | <50k |
| 28679 | Private | HS-grad | Married-civ-spouse | Craft-repair | Husband | White | False | 28.0 | 65078.0 | 9.0 | >=50k |
| 11669 | Private | HS-grad | Never-married | Adm-clerical | Not-in-family | White | False | 38.0 | 202683.0 | 9.0 | <50k |
| 29079 | Self-emp-not-inc | Bachelors | Married-civ-spouse | Prof-specialty | Husband | White | False | 41.0 | 168098.0 | 13.0 | <50k |
| 7061 | Private | HS-grad | Married-civ-spouse | Adm-clerical | Husband | White | False | 31.0 | 243442.0 | 9.0 | <50k |
tab_model = build_tabular_model(TabModel, dls=tab_dls)
b = first(tab_dls.train)
test_eq(tab_model.to(b[0].device)(*b[:-1]).shape, (tab_dls.bs, tab_dls.c))
learn = Learner(tab_dls, tab_model, splitter=ts_splitter)
p1 = count_parameters(learn.model)
learn.freeze()
p2 = count_parameters(learn.model)
learn.unfreeze()
p3 = count_parameters(learn.model)
assert p1 == p3
assert p1 > p2 > 0