Optuna

A hyperparameter optimization framework

Optuna is an automatic hyperparameter optimization software framework, particularly designed for machine learning. It features an imperative, define-by-run style user API. Thanks to our define-by-run API, the code written with Optuna enjoys high modularity, and the user of Optuna can dynamically construct the search spaces for the hyperparameters.


source

run_optuna_study


def run_optuna_study(
    objective, resume:NoneType=None, study_type:NoneType=None, multivariate:bool=True, search_space:NoneType=None,
    evaluate:NoneType=None, seed:NoneType=None, sampler:NoneType=None, pruner:NoneType=None,
    study_name:NoneType=None, direction:str='maximize', n_trials:NoneType=None, timeout:NoneType=None,
    gc_after_trial:bool=False, show_progress_bar:bool=True, save_study:bool=True, path:str='optuna',
    show_plots:bool=True
):

Creates and runs an optuna study.

Args: objective: A callable that implements objective function. resume: Path to a previously saved study. study_type: Type of study selected (bayesian, gridsearch, randomsearch). Based on this a sampler will be build if sampler is None. If a sampler is passed, this has no effect. multivariate: If this is True, the multivariate TPE is used when suggesting parameters. The multivariate TPE is reported to outperform the independent TPE. search_space: Search space required when running a gridsearch (if you don’t pass a sampler). evaluate: Allows you to pass a specific set of hyperparameters that will be evaluated. seed: Fixed seed used by samplers. sampler: A sampler object that implements background algorithm for value suggestion. If None is specified, TPESampler is used during single-objective optimization and NSGAIISampler during multi-objective optimization. See also samplers. pruner: A pruner object that decides early stopping of unpromising trials. If None is specified, MedianPruner is used as the default. See also pruners. study_name: Study’s name. If this argument is set to None, a unique name is generated automatically. direction: Direction for single-objective optimization, or a sequence of directions during multi-objective optimization. n_trials: The number of trials. If this argument is set to None, there is no limitation on the number of trials. If timeout is also set to None, the study continues to create trials until it receives a termination signal such as Ctrl+C or SIGTERM. timeout: Stop study after the given number of second(s). If this argument is set to None, the study is executed without time limitation. If n_trials is also set to None, the study continues to create trials until it receives a termination signal such as Ctrl+C or SIGTERM. gc_after_trial: Flag to execute garbage collection at the end of each trial. By default, garbage collection is enabled, just in case. You can turn it off with this argument if memory is safely managed in your objective function. show_progress_bar: Flag to show progress bars or not. To disable progress bar, set this False. save_study: Save your study when finished/ interrupted. path: Folder where the study will be saved. show_plots: Flag to control whether plots are shown at the end of the study.

Exported source
def run_optuna_study(objective, resume=None, study_type=None, multivariate=True, search_space=None, evaluate=None, seed=None, sampler=None, pruner=None, 
                     study_name=None, direction='maximize', n_trials=None, timeout=None, gc_after_trial=False, show_progress_bar=True, 
                     save_study=True, path='optuna', show_plots=True):
    r"""Creates and runs an optuna study.

    Args: 
        objective:          A callable that implements objective function.
        resume:             Path to a previously saved study.
        study_type:         Type of study selected (bayesian, gridsearch, randomsearch). Based on this a sampler will be build if sampler is None. 
                            If a sampler is passed, this has no effect.
        multivariate:       If this is True, the multivariate TPE is used when suggesting parameters. The multivariate TPE is reported to outperform 
                            the independent TPE.
        search_space:       Search space required when running a gridsearch (if you don't pass a sampler).
        evaluate:           Allows you to pass a specific set of hyperparameters that will be evaluated.
        seed:               Fixed seed used by samplers.
        sampler:            A sampler object that implements background algorithm for value suggestion. If None is specified, TPESampler is used during 
                            single-objective optimization and NSGAIISampler during multi-objective optimization. See also samplers.
        pruner:             A pruner object that decides early stopping of unpromising trials. If None is specified, MedianPruner is used as the default. 
                            See also pruners.
        study_name:         Study’s name. If this argument is set to None, a unique name is generated automatically.
        direction:          Direction for single-objective optimization, or a sequence of directions during multi-objective optimization.
        n_trials:           The number of trials. If this argument is set to None, there is no limitation on the number of trials. If timeout is also set to 
                            None, the study continues to create trials until it receives a termination signal such as Ctrl+C or SIGTERM.
        timeout:            Stop study after the given number of second(s). If this argument is set to None, the study is executed without time limitation. 
                            If n_trials is also set to None, the study continues to create trials until it receives a termination signal such as 
                            Ctrl+C or SIGTERM.
        gc_after_trial:     Flag to execute garbage collection at the end of each trial. By default, garbage collection is enabled, just in case. 
                            You can turn it off with this argument if memory is safely managed in your objective function.
        show_progress_bar:  Flag to show progress bars or not. To disable progress bar, set this False.
        save_study:         Save your study when finished/ interrupted.
        path:               Folder where the study will be saved.
        show_plots:         Flag to control whether plots are shown at the end of the study.
    """
    
    try: import optuna
    except ImportError: raise ImportError('You need to install optuna to use run_optuna_study')

    is_multi_objective = isinstance(direction, Sequence) and not isinstance(direction, (str, bytes))

    def _is_multi_objective_study(study):
        return len(getattr(study, 'directions', [])) > 1

    def _print_best_study_results(study, header):
        if _is_multi_objective_study(study):
            trials = study.best_trials
            if not trials: raise ValueError('No finished trials yet.')
            print(header.replace('trial', 'trials'))
            for trial in trials:
                print(f"  Trial {trial.number}:")
                print(f"    values          : {trial.values}")
                print(f"    best_params = {trial.params}")
        else:
            trial = study.best_trial
            print(header)
            print(" Value: ", trial.value)
            print(" Params: ")
            for key, value in trial.params.items():
                print(f"    {key}: {value}")

    # Sampler
    if sampler is None:
        if study_type is None or "bayes" in study_type.lower(): 
            sampler = optuna.samplers.TPESampler(seed=seed, multivariate=multivariate)
        elif "grid" in study_type.lower():
            assert search_space, f"you need to pass a search_space dict to run a gridsearch"
            sampler = optuna.samplers.GridSampler(search_space)
        elif "random" in study_type.lower(): 
            sampler = optuna.samplers.RandomSampler(seed=seed)
    assert sampler, "you need to either select a study type (bayesian, gridsampler, randomsampler) or pass a sampler"

    # Study
    if resume: 
        try:
            study = joblib.load(resume)
        except: 
            print(f"joblib.load({resume}) couldn't recover any saved study. Check the path.")
            return
        try:
            _print_best_study_results(study, "Best trial until now:")
        except:
            print("No finished trials yet.")
    else: 
        study_kwargs = dict(sampler=sampler, pruner=pruner, study_name=study_name)
        if is_multi_objective: study_kwargs['directions'] = direction
        else: study_kwargs['direction'] = direction
        study = optuna.create_study(**study_kwargs)
    if evaluate: study.enqueue_trial(evaluate)
    try:
        study.optimize(objective, n_trials=n_trials, timeout=timeout, gc_after_trial=gc_after_trial, show_progress_bar=show_progress_bar)
    except KeyboardInterrupt:
        pass

    # Save
    if save_study:
        full_path = Path(path)/f'{study.study_name}.pkl'
        full_path.parent.mkdir(parents=True, exist_ok=True)
        joblib.dump(study, full_path)
        print(f'\nOptuna study saved to {full_path}')
        print(f"To reload the study run: study = joblib.load('{full_path}')")

    # Plots
    if show_plots and len(study.trials) > 1:
        try: display(optuna.visualization.plot_optimization_history(study))
        except: pass
        try: display(optuna.visualization.plot_param_importances(study))
        except: pass
        try: display(optuna.visualization.plot_slice(study))
        except: pass
        try: display(optuna.visualization.plot_parallel_coordinate(study))
        except: pass

    # Study stats
    try:
        pruned_trials = [t for t in study.trials if t.state == optuna.trial.TrialState.PRUNED]
        complete_trials = [t for t in study.trials if t.state == optuna.trial.TrialState.COMPLETE]
        print(f"\nStudy statistics    : ")
        print(f"  Study name        : {study.study_name}")
        print(f"  # finished trials : {len(study.trials)}")
        print(f"  # pruned trials   : {len(pruned_trials)}")
        print(f"  # complete trials : {len(complete_trials)}")
        
        _print_best_study_results(study, "\nBest trial          :")
    except:
        print('\nNo finished trials yet.')
    return study
import contextlib, io, sys
from types import ModuleType, SimpleNamespace

class _FakeOptunaTrial:
    def __init__(self, number, values=None, value=None, params=None, state='COMPLETE'):
        self.number, self.values, self.value = number, values, value
        self.params, self.state = params or {}, state

class _FakeOptunaStudy:
    def __init__(self, kwargs):
        self.study_name = kwargs.get('study_name') or 'fake-study'
        directions = kwargs.get('directions')
        direction = kwargs.get('direction')
        self.directions = list(directions) if directions is not None else [direction]
        if len(self.directions) > 1:
            self._best_trials = [_FakeOptunaTrial(0, values=[1., 2.], params={'x': 1})]
        else:
            self._best_trials = [_FakeOptunaTrial(0, value=1., params={'x': 1})]
        self.trials = self._best_trials
        self.best_trial_accessed = False
        self.best_trials_accessed = False

    def enqueue_trial(self, evaluate): self.enqueued = evaluate
    def optimize(self, *args, **kwargs): self.optimize_args = args, kwargs

    @property
    def best_trial(self):
        self.best_trial_accessed = True
        if len(self.directions) > 1: raise RuntimeError('single best trial is unavailable')
        return self._best_trials[0]

    @property
    def best_trials(self):
        self.best_trials_accessed = True
        return self._best_trials

class _FakeOptuna(ModuleType):
    def __init__(self):
        super().__init__('optuna')
        self.created_studies = []
        self.samplers = SimpleNamespace(
            TPESampler=lambda **kwargs: ('tpe', kwargs),
            GridSampler=lambda search_space: ('grid', search_space),
            RandomSampler=lambda **kwargs: ('random', kwargs),
        )
        self.trial = SimpleNamespace(TrialState=SimpleNamespace(PRUNED='PRUNED', COMPLETE='COMPLETE'))
        self.visualization = SimpleNamespace(
            plot_optimization_history=lambda study: None,
            plot_param_importances=lambda study: None,
            plot_slice=lambda study: None,
            plot_parallel_coordinate=lambda study: None,
        )

    def create_study(self, **kwargs):
        self.created_studies.append(kwargs)
        return _FakeOptunaStudy(kwargs)

@contextlib.contextmanager
def _use_fake_optuna(fake):
    old_optuna = sys.modules.get('optuna')
    sys.modules['optuna'] = fake
    try: yield fake
    finally:
        if old_optuna is None: sys.modules.pop('optuna', None)
        else: sys.modules['optuna'] = old_optuna

fake = _FakeOptuna()
with _use_fake_optuna(fake), contextlib.redirect_stdout(io.StringIO()):
    study = run_optuna_study(lambda trial: (1., 2.), study_type='random', direction=['minimize', 'minimize'], n_trials=1, save_study=False, show_plots=False, show_progress_bar=False)
created_study = fake.created_studies[0]
# Multi-objective Optuna studies must be created with directions= and reported with best_trials.
assert created_study.get('directions') == ['minimize', 'minimize']
assert 'direction' not in created_study
assert study.best_trials_accessed
assert not study.best_trial_accessed

old_joblib_load = joblib.load
try:
    resumed_study = _FakeOptunaStudy({'directions': ['minimize', 'minimize']})
    joblib.load = lambda resume: resumed_study
    fake = _FakeOptuna()
    with _use_fake_optuna(fake), contextlib.redirect_stdout(io.StringIO()):
        study = run_optuna_study(lambda trial: (1., 2.), resume='fake-study.pkl', n_trials=1, save_study=False, show_plots=False, show_progress_bar=False)
    assert study is resumed_study
    assert study.best_trials_accessed
    assert not study.best_trial_accessed

    # Test resume with empty/no completed trials
    resumed_empty_study = _FakeOptunaStudy({'directions': ['minimize', 'minimize']})
    resumed_empty_study._best_trials = []
    resumed_empty_study.trials = []
    joblib.load = lambda resume: resumed_empty_study
    fake = _FakeOptuna()
    with _use_fake_optuna(fake), contextlib.redirect_stdout(io.StringIO()) as trapped:
        study = run_optuna_study(lambda trial: (1., 2.), resume='fake-study-empty.pkl', n_trials=1, save_study=False, show_plots=False, show_progress_bar=False)
    assert study is resumed_empty_study
    assert "No finished trials yet." in trapped.getvalue()
finally:
    joblib.load = old_joblib_load

fake = _FakeOptuna()
with _use_fake_optuna(fake), contextlib.redirect_stdout(io.StringIO()):
    study = run_optuna_study(lambda trial: 1., study_type='random', direction='minimize', n_trials=1, save_study=False, show_plots=False, show_progress_bar=False)
created_study = fake.created_studies[0]
assert created_study.get('direction') == 'minimize'
assert 'directions' not in created_study
assert study.best_trial_accessed