Time Series Tabular Data

Main Tabular functions used throughout the library. This is helpful when you have additional time series data like metadata, time series features, etc.


source

get_tabular_ds


def get_tabular_ds(
    df,
    procs:list=[<class 'fastai.tabular.core.Categorify'>, <class 'fastai.tabular.core.FillMissing'>, <class 'fastai.data.transforms.Normalize'>],
    cat_names:NoneType=None, cont_names:NoneType=None, y_names:NoneType=None, groupby:NoneType=None,
    y_block:NoneType=None, splits:NoneType=None, do_setup:bool=True, inplace:bool=False, reduce_memory:bool=True,
    device:NoneType=None
):

Call self as a function.


source

get_tabular_dls


def get_tabular_dls(
    df,
    procs:list=[<class 'fastai.tabular.core.Categorify'>, <class 'fastai.tabular.core.FillMissing'>, <class 'fastai.data.transforms.Normalize'>],
    cat_names:NoneType=None, cont_names:NoneType=None, y_names:NoneType=None, bs:int=64, y_block:NoneType=None,
    splits:NoneType=None, do_setup:bool=True, inplace:bool=False, reduce_memory:bool=True, device:NoneType=None,
    path:str | Path='.', # Path to store export objects
):

Call self as a function.


source

preprocess_df


def preprocess_df(
    df,
    procs:list=[<class 'fastai.tabular.core.Categorify'>, <class 'fastai.tabular.core.FillMissing'>, <class 'fastai.data.transforms.Normalize'>],
    cat_names:NoneType=None, cont_names:NoneType=None, y_names:NoneType=None, sample_col:NoneType=None,
    reduce_memory:bool=True
):

Call self as a function.

path = untar_data(URLs.ADULT_SAMPLE)
df = pd.read_csv(path/'adult.csv')
# df['salary'] = np.random.rand(len(df)) # uncomment to simulate a cont dependent variable

cat_names = ['workclass', 'education', 'education-num', 'marital-status', 'occupation', 'relationship', 'race', 'sex',
             'capital-gain', 'capital-loss', 'native-country']
cont_names = ['age', 'fnlwgt', 'hours-per-week']
target = ['salary']
splits = RandomSplitter()(range_of(df))

dls = get_tabular_dls(df, cat_names=cat_names, cont_names=cont_names, y_names='salary', splits=splits, bs=512, device=device)
dls.show_batch()
workclass education education-num marital-status occupation relationship race sex capital-gain capital-loss native-country age fnlwgt hours-per-week salary
0 Self-emp-inc Prof-school 15.0 Never-married Prof-specialty Not-in-family White Male 27828 0 United-States 58.000000 112944.998841 40.000000 >=50k
1 Private Assoc-acdm 12.0 Widowed Adm-clerical Not-in-family White Female 0 0 United-States 57.000001 182027.999765 35.000000 <50k
2 Private 1st-4th 2.0 Widowed Priv-house-serv Not-in-family Black Female 0 0 United-States 57.000001 190942.000041 30.000000 <50k
3 State-gov HS-grad 9.0 Married-civ-spouse Protective-serv Husband Black Male 0 0 United-States 52.000000 142756.999688 40.000000 >=50k
4 Private Some-college 10.0 Never-married Adm-clerical Not-in-family Black Female 0 0 United-States 58.000000 105059.998140 37.000000 <50k
5 Self-emp-not-inc Some-college 10.0 Married-civ-spouse Craft-repair Husband White Male 0 0 United-States 33.000000 334743.993726 50.000000 >=50k
6 Private Bachelors 13.0 Married-civ-spouse Prof-specialty Husband White Male 0 0 United-States 53.000000 31587.998222 52.000000 >=50k
7 Private Some-college 10.0 Never-married Farming-fishing Other-relative Black Male 0 0 United-States 30.000000 214062.999938 72.000001 <50k
8 Local-gov HS-grad 9.0 Married-civ-spouse Other-service Husband White Male 0 0 United-States 58.000000 223214.000209 40.000000 <50k
9 State-gov Some-college 10.0 Divorced Adm-clerical Own-child White Male 0 0 United-States 49.000000 206577.000007 40.000000 <50k
metrics = mae if dls.c == 1 else accuracy
learn = tabular_learner(dls, layers=[200, 100], y_range=None, metrics=metrics)
learn.fit(1, 1e-2)
epoch train_loss valid_loss accuracy time
0 0.359194 0.319021 0.843980 00:01
learn.dls.one_batch()
(tensor([[ 5, 10, 13,  ...,  1,  1, 40],
         [ 5,  9, 11,  ...,  1,  1, 40],
         [ 5, 12,  9,  ...,  1,  1, 40],
         ...,
         [ 5,  8, 12,  ..., 48,  1, 12],
         [ 5,  9, 11,  ..., 54,  1, 40],
         [ 5,  9, 11,  ...,  1,  1, 40]], device='mps:0'),
 tensor([[-0.9933, -0.0060, -1.9716],
         [-0.0411, -0.2058, -0.0346],
         [-1.4328,  0.6653, -1.6487],
         ...,
         [ 2.2294,  1.7088, -2.7786],
         [ 0.1053, -0.2056, -0.0346],
         [-0.1144,  0.0362,  0.1268]], device='mps:0'),
 tensor([[0],
         [0],
         [0],
         [0],
         [0],
         [0],
         [0],
         [0],
         [0],
         [1],
         [0],
         [0],
         [0],
         [0],
         [0],
         [0],
         [0],
         [0],
         [0],
         [0],
         [0],
         [0],
         [1],
         [0],
         [0],
         [0],
         [0],
         [0],
         [1],
         [0],
         [1],
         [0],
         [0],
         [0],
         [0],
         [0],
         [1],
         [0],
         [0],
         [0],
         [0],
         [0],
         [0],
         [0],
         [0],
         [0],
         [0],
         [0],
         [1],
         [0],
         [0],
         [0],
         [0],
         [0],
         [0],
         [0],
         [0],
         [1],
         [0],
         [0],
         [0],
         [0],
         [0],
         [0],
         [1],
         [0],
         [0],
         [0],
         [0],
         [0],
         [0],
         [0],
         [0],
         [0],
         [1],
         [1],
         [0],
         [0],
         [0],
         [1],
         [1],
         [0],
         [0],
         [1],
         [1],
         [0],
         [0],
         [0],
         [0],
         [0],
         [0],
         [1],
         [0],
         [0],
         [0],
         [0],
         [0],
         [1],
         [0],
         [0],
         [0],
         [0],
         [0],
         [0],
         [0],
         [1],
         [1],
         [0],
         [1],
         [1],
         [0],
         [1],
         [0],
         [0],
         [0],
         [0],
         [1],
         [0],
         [1],
         [1],
         [0],
         [0],
         [0],
         [0],
         [0],
         [1],
         [0],
         [0],
         [0],
         [0],
         [0],
         [0],
         [1],
         [1],
         [1],
         [1],
         [0],
         [0],
         [1],
         [0],
         [1],
         [0],
         [0],
         [0],
         [1],
         [0],
         [0],
         [0],
         [0],
         [0],
         [0],
         [0],
         [0],
         [0],
         [1],
         [0],
         [0],
         [0],
         [0],
         [0],
         [0],
         [0],
         [0],
         [1],
         [0],
         [0],
         [1],
         [0],
         [0],
         [0],
         [0],
         [1],
         [1],
         [0],
         [0],
         [0],
         [0],
         [0],
         [0],
         [1],
         [0],
         [0],
         [0],
         [0],
         [0],
         [0],
         [0],
         [0],
         [0],
         [0],
         [0],
         [0],
         [1],
         [1],
         [0],
         [0],
         [0],
         [0],
         [0],
         [0],
         [0],
         [1],
         [0],
         [0],
         [0],
         [0],
         [1],
         [0],
         [1],
         [0],
         [1],
         [0],
         [0],
         [0],
         [0],
         [0],
         [1],
         [0],
         [0],
         [0],
         [0],
         [0],
         [0],
         [0],
         [0],
         [0],
         [0],
         [0],
         [0],
         [0],
         [0],
         [1],
         [0],
         [0],
         [1],
         [1],
         [0],
         [1],
         [0],
         [0],
         [1],
         [0],
         [0],
         [1],
         [0],
         [0],
         [0],
         [0],
         [0],
         [1],
         [1],
         [0],
         [0],
         [1],
         [0],
         [1],
         [0],
         [0],
         [0],
         [0],
         [0],
         [0],
         [0],
         [0],
         [0],
         [0],
         [0],
         [0],
         [0],
         [0],
         [0],
         [0],
         [0],
         [1],
         [1],
         [0],
         [0],
         [0],
         [1],
         [1],
         [0],
         [0],
         [0],
         [0],
         [0],
         [1],
         [0],
         [0],
         [0],
         [0],
         [0],
         [0],
         [0],
         [0],
         [0],
         [1],
         [1],
         [0],
         [0],
         [0],
         [0],
         [1],
         [0],
         [0],
         [1],
         [0],
         [0],
         [1],
         [0],
         [0],
         [0],
         [1],
         [0],
         [1],
         [0],
         [0],
         [0],
         [1],
         [1],
         [0],
         [0],
         [0],
         [0],
         [1],
         [1],
         [0],
         [0],
         [0],
         [1],
         [0],
         [0],
         [1],
         [0],
         [0],
         [0],
         [0],
         [0],
         [1],
         [0],
         [0],
         [0],
         [0],
         [0],
         [0],
         [0],
         [1],
         [0],
         [0],
         [0],
         [1],
         [0],
         [0],
         [0],
         [0],
         [0],
         [0],
         [0],
         [0],
         [1],
         [0],
         [0],
         [0],
         [0],
         [0],
         [0],
         [0],
         [1],
         [0],
         [0],
         [0],
         [0],
         [0],
         [0],
         [0],
         [0],
         [0],
         [1],
         [1],
         [1],
         [0],
         [0],
         [0],
         [0],
         [0],
         [1],
         [0],
         [0],
         [0],
         [1],
         [1],
         [0],
         [0],
         [1],
         [0],
         [0],
         [0],
         [1],
         [1],
         [0],
         [0],
         [0],
         [0],
         [0],
         [0],
         [0],
         [1],
         [0],
         [0],
         [0],
         [0],
         [0],
         [0],
         [0],
         [0],
         [0],
         [0],
         [0],
         [1],
         [0],
         [0],
         [0],
         [0],
         [1],
         [1],
         [0],
         [0],
         [0],
         [0],
         [1],
         [0],
         [0],
         [0],
         [0],
         [1],
         [0],
         [1],
         [1],
         [1],
         [1],
         [0],
         [1],
         [0],
         [0],
         [0],
         [0],
         [0],
         [1],
         [0],
         [1],
         [1],
         [0],
         [1],
         [0],
         [0],
         [0],
         [0],
         [0],
         [0],
         [0],
         [0],
         [0],
         [1],
         [0],
         [0],
         [0],
         [0],
         [0],
         [0],
         [1],
         [0],
         [0],
         [1],
         [1],
         [0],
         [0],
         [1],
         [0],
         [1],
         [0],
         [1],
         [0],
         [0],
         [0],
         [0],
         [1],
         [0],
         [0],
         [0],
         [0],
         [1],
         [0],
         [0],
         [0],
         [1],
         [0],
         [0],
         [0],
         [0],
         [0],
         [0],
         [0],
         [0],
         [0],
         [1],
         [1],
         [0],
         [0],
         [0],
         [0],
         [0],
         [0],
         [1]], device='mps:0', dtype=torch.int8))
learn.model
TabularModel(
  (embeds): ModuleList(
    (0): Embedding(10, 6)
    (1-2): 2 x Embedding(17, 8)
    (3): Embedding(8, 5)
    (4): Embedding(16, 8)
    (5): Embedding(7, 5)
    (6): Embedding(6, 4)
    (7): Embedding(3, 3)
    (8): Embedding(119, 23)
    (9): Embedding(91, 20)
    (10): Embedding(43, 13)
  )
  (emb_drop): Dropout(p=0.0, inplace=False)
  (bn_cont): BatchNorm1d(3, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (layers): Sequential(
    (0): LinBnDrop(
      (0): Linear(in_features=106, out_features=200, bias=False)
      (1): ReLU(inplace=True)
      (2): BatchNorm1d(200, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): LinBnDrop(
      (0): Linear(in_features=200, out_features=100, bias=False)
      (1): ReLU(inplace=True)
      (2): BatchNorm1d(100, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (2): LinBnDrop(
      (0): Linear(in_features=100, out_features=2, bias=True)
    )
  )
)
path = untar_data(URLs.ADULT_SAMPLE)
df = pd.read_csv(path/'adult.csv')
cat_names = ['workclass', 'education', 'education-num', 'marital-status', 'occupation', 'relationship', 'race', 'sex',
             'capital-gain', 'capital-loss', 'native-country']
cont_names = ['age', 'fnlwgt', 'hours-per-week']
target = ['salary']
df, procs = preprocess_df(df, procs=[Categorify, FillMissing, Normalize], cat_names=cat_names, cont_names=cont_names, y_names=target,
                          sample_col=None, reduce_memory=True)
df.head()
workclass education education-num marital-status occupation relationship race sex capital-gain capital-loss native-country age fnlwgt hours-per-week salary
0 5 8 12 3 0 6 5 1 1 48 40 0.763796 -0.838084 -0.035429 1
1 5 13 14 1 5 2 5 2 101 1 40 0.397233 0.444987 0.369519 1
2 5 12 0 1 0 5 3 1 1 1 40 -0.042642 -0.886734 -0.683348 0
3 6 15 15 3 11 1 2 2 1 1 40 -0.042642 -0.728873 -0.035429 1
4 7 6 0 3 9 6 3 1 1 1 40 0.250608 -1.018314 0.774468 0
procs.classes, procs.means, procs.stds
({'workclass': ['#na#', ' ?', ' Federal-gov', ' Local-gov', ' Never-worked', ' Private', ' Self-emp-inc', ' Self-emp-not-inc', ' State-gov', ' Without-pay'],
  'education': ['#na#', ' 10th', ' 11th', ' 12th', ' 1st-4th', ' 5th-6th', ' 7th-8th', ' 9th', ' Assoc-acdm', ' Assoc-voc', ' Bachelors', ' Doctorate', ' HS-grad', ' Masters', ' Preschool', ' Prof-school', ' Some-college'],
  'education-num': ['#na#', np.float32(1.0), np.float32(2.0), np.float32(3.0), np.float32(4.0), np.float32(5.0), np.float32(6.0), np.float32(7.0), np.float32(8.0), np.float32(9.0), np.float32(10.0), np.float32(11.0), np.float32(12.0), np.float32(13.0), np.float32(14.0), np.float32(15.0), np.float32(16.0)],
  'marital-status': ['#na#', ' Divorced', ' Married-AF-spouse', ' Married-civ-spouse', ' Married-spouse-absent', ' Never-married', ' Separated', ' Widowed'],
  'occupation': ['#na#', ' ?', ' Adm-clerical', ' Armed-Forces', ' Craft-repair', ' Exec-managerial', ' Farming-fishing', ' Handlers-cleaners', ' Machine-op-inspct', ' Other-service', ' Priv-house-serv', ' Prof-specialty', ' Protective-serv', ' Sales', ' Tech-support', ' Transport-moving'],
  'relationship': ['#na#', ' Husband', ' Not-in-family', ' Other-relative', ' Own-child', ' Unmarried', ' Wife'],
  'race': ['#na#', ' Amer-Indian-Eskimo', ' Asian-Pac-Islander', ' Black', ' Other', ' White'],
  'sex': ['#na#', ' Female', ' Male'],
  'capital-gain': ['#na#', np.int32(0), np.int32(114), np.int32(401), np.int32(594), np.int32(914), np.int32(991), np.int32(1055), np.int32(1086), np.int32(1111), np.int32(1151), np.int32(1173), np.int32(1409), np.int32(1424), np.int32(1455), np.int32(1471), np.int32(1506), np.int32(1639), np.int32(1797), np.int32(1831), np.int32(1848), np.int32(2009), np.int32(2036), np.int32(2050), np.int32(2062), np.int32(2105), np.int32(2174), np.int32(2176), np.int32(2202), np.int32(2228), np.int32(2290), np.int32(2329), np.int32(2346), np.int32(2354), np.int32(2387), np.int32(2407), np.int32(2414), np.int32(2463), np.int32(2538), np.int32(2580), np.int32(2597), np.int32(2635), np.int32(2653), np.int32(2829), np.int32(2885), np.int32(2907), np.int32(2936), np.int32(2961), np.int32(2964), np.int32(2977), np.int32(2993), np.int32(3103), np.int32(3137), np.int32(3273), np.int32(3325), np.int32(3411), np.int32(3418), np.int32(3432), np.int32(3456), np.int32(3464), np.int32(3471), np.int32(3674), np.int32(3781), np.int32(3818), np.int32(3887), np.int32(3908), np.int32(3942), np.int32(4064), np.int32(4101), np.int32(4386), np.int32(4416), np.int32(4508), np.int32(4650), np.int32(4687), np.int32(4787), np.int32(4865), np.int32(4931), np.int32(4934), np.int32(5013), np.int32(5060), np.int32(5178), np.int32(5455), np.int32(5556), np.int32(5721), np.int32(6097), np.int32(6360), np.int32(6418), np.int32(6497), np.int32(6514), np.int32(6723), np.int32(6767), np.int32(6849), np.int32(7298), np.int32(7430), np.int32(7443), np.int32(7688), np.int32(7896), np.int32(7978), np.int32(8614), np.int32(9386), np.int32(9562), np.int32(10520), np.int32(10566), np.int32(10605), np.int32(11678), np.int32(13550), np.int32(14084), np.int32(14344), np.int32(15020), np.int32(15024), np.int32(15831), np.int32(18481), np.int32(20051), np.int32(22040), np.int32(25124), np.int32(25236), np.int32(27828), np.int32(34095), np.int32(41310), np.int32(99999)],
  'capital-loss': ['#na#', np.int16(0), np.int16(155), np.int16(213), np.int16(323), np.int16(419), np.int16(625), np.int16(653), np.int16(810), np.int16(880), np.int16(974), np.int16(1092), np.int16(1138), np.int16(1258), np.int16(1340), np.int16(1380), np.int16(1408), np.int16(1411), np.int16(1485), np.int16(1504), np.int16(1539), np.int16(1564), np.int16(1573), np.int16(1579), np.int16(1590), np.int16(1594), np.int16(1602), np.int16(1617), np.int16(1628), np.int16(1648), np.int16(1651), np.int16(1668), np.int16(1669), np.int16(1672), np.int16(1719), np.int16(1721), np.int16(1726), np.int16(1735), np.int16(1740), np.int16(1741), np.int16(1755), np.int16(1762), np.int16(1816), np.int16(1825), np.int16(1844), np.int16(1848), np.int16(1876), np.int16(1887), np.int16(1902), np.int16(1944), np.int16(1974), np.int16(1977), np.int16(1980), np.int16(2001), np.int16(2002), np.int16(2042), np.int16(2051), np.int16(2057), np.int16(2080), np.int16(2129), np.int16(2149), np.int16(2163), np.int16(2174), np.int16(2179), np.int16(2201), np.int16(2205), np.int16(2206), np.int16(2231), np.int16(2238), np.int16(2246), np.int16(2258), np.int16(2267), np.int16(2282), np.int16(2339), np.int16(2352), np.int16(2377), np.int16(2392), np.int16(2415), np.int16(2444), np.int16(2457), np.int16(2467), np.int16(2472), np.int16(2489), np.int16(2547), np.int16(2559), np.int16(2603), np.int16(2754), np.int16(2824), np.int16(3004), np.int16(3683), np.int16(3770), np.int16(3900), np.int16(4356)],
  'native-country': ['#na#', ' ?', ' Cambodia', ' Canada', ' China', ' Columbia', ' Cuba', ' Dominican-Republic', ' Ecuador', ' El-Salvador', ' England', ' France', ' Germany', ' Greece', ' Guatemala', ' Haiti', ' Holand-Netherlands', ' Honduras', ' Hong', ' Hungary', ' India', ' Iran', ' Ireland', ' Italy', ' Jamaica', ' Japan', ' Laos', ' Mexico', ' Nicaragua', ' Outlying-US(Guam-USVI-etc)', ' Peru', ' Philippines', ' Poland', ' Portugal', ' Puerto-Rico', ' Scotland', ' South', ' Taiwan', ' Thailand', ' Trinadad&Tobago', ' United-States', ' Vietnam', ' Yugoslavia']},
 {'age': np.float64(38.58164675532078),
  'fnlwgt': np.float64(189778.36651208502),
  'hours-per-week': np.float64(40.437455852092995)},
 {'age': np.float64(13.640223192304274),
  'fnlwgt': np.float64(105548.3568809908),
  'hours-per-week': np.float64(12.347239175707989)})