from typing import Any, Dict, List, Optional, Union
import hubble
from finetuner import DocumentArray
from finetuner.client import FinetunerV1Client
from finetuner.constants import CREATED_AT, DESCRIPTION, NAME, STATUS
from finetuner.data import CSVOptions
from finetuner.excepts import FinetunerServerError
from finetuner.experiment import Experiment
from finetuner.model import SynthesisModels
from finetuner.run import Run
from hubble import login_required
[docs]class Finetuner:
"""Finetuner class."""
def __init__(self):
self._client = None
self._default_experiment = None
self._default_experiment_name = 'default'
[docs] def login(self, force: bool = False, interactive: Optional[bool] = None):
"""Login to Hubble account, initialize a client object
and create a default experiment.
:param force: If set to true, overwrite token and re-login.
Note: Calling `login` is necessary for using finetuner.
"""
hubble.login(
force=force, post_success=self._init_state, interactive=interactive
)
@login_required
def _init_state(self):
"""Initialize client and default experiment."""
self._client = FinetunerV1Client()
self._default_experiment = self._get_default_experiment()
def _get_default_experiment(self) -> Experiment:
"""Create or retrieve (if it already exists) a default experiment
for the current working directory."""
for experiment in self.list_experiments():
if experiment.name == self._default_experiment_name:
return experiment
return self.create_experiment(name=self._default_experiment_name)
@login_required
def create_experiment(self, name: str = 'default') -> Experiment:
"""Create an experiment.
:param name: Optional name of the experiment. If `None`,
the experiment is named after the current directory.
:return: An `Experiment` object.
"""
try:
experiment = self._client.get_experiment(name=name)
except FinetunerServerError:
experiment = self._client.create_experiment(name=name)
return Experiment(
client=self._client,
name=experiment[NAME],
status=experiment[STATUS],
created_at=experiment[CREATED_AT],
description=experiment[DESCRIPTION],
)
@login_required
def get_experiment(self, name: str) -> Experiment:
"""Get an experiment by its name.
:param name: Name of the experiment.
:return: An `Experiment` object.
"""
experiment = self._client.get_experiment(name=name)
return Experiment(
client=self._client,
name=experiment[NAME],
status=experiment[STATUS],
created_at=experiment[CREATED_AT],
description=experiment[DESCRIPTION],
)
@login_required
def list_experiments(self, page: int = 1, size: int = 50) -> List[Experiment]:
"""List every experiment.
:param page: The page index.
:param size: The number of experiments to retrieve.
:return: A list of :class:`Experiment` instance.
..note:: `page` and `size` works together. For example, page 1 size 50 gives
the 50 experiments in the first page. To get 50-100, set `page` as 2.
..note:: The maximum number for `size` per page is 100.
"""
experiments = self._client.list_experiments(page=page, size=size)['items']
return [
Experiment(
client=self._client,
name=experiment[NAME],
status=experiment[STATUS],
created_at=experiment[CREATED_AT],
description=experiment[DESCRIPTION],
)
for experiment in experiments
]
@login_required
def delete_experiment(self, name: str) -> Experiment:
"""Delete an experiment by its name.
:param name: Name of the experiment.
:return: Deleted experiment.
"""
experiment = self._client.delete_experiment(name=name)
return Experiment(
client=self._client,
name=experiment[NAME],
status=experiment[STATUS],
created_at=experiment[CREATED_AT],
description=experiment[DESCRIPTION],
)
@login_required
def delete_experiments(self) -> List[Experiment]:
"""Delete every experiment.
:return: List of deleted experiments.
"""
experiments = self._client.delete_experiments()
return [
Experiment(
client=self._client,
name=experiment[NAME],
status=experiment[STATUS],
created_at=experiment[CREATED_AT],
description=experiment[DESCRIPTION],
)
for experiment in experiments
]
@login_required
def create_training_run(
self,
model: str,
train_data: Union[str, DocumentArray],
eval_data: Optional[Union[str, DocumentArray]] = None,
val_split: float = 0.0,
model_artifact: Optional[str] = None,
run_name: Optional[str] = None,
description: Optional[str] = None,
experiment_name: Optional[str] = None,
model_options: Optional[Dict[str, Any]] = None,
loss: str = 'TripletMarginLoss',
miner: Optional[str] = None,
miner_options: Optional[Dict[str, Any]] = None,
optimizer: str = 'Adam',
optimizer_options: Optional[Dict[str, Any]] = None,
learning_rate: Optional[float] = None,
epochs: int = 5,
batch_size: Optional[int] = None,
callbacks: Optional[List[Any]] = None,
scheduler: Optional[str] = None,
scheduler_options: Optional[Dict[str, Any]] = None,
freeze: bool = False,
output_dim: Optional[int] = None,
device: str = 'cuda',
num_workers: int = 4,
to_onnx: bool = False,
csv_options: Optional[CSVOptions] = None,
public: bool = False,
num_items_per_class: int = 4,
sampler: str = 'auto',
loss_optimizer: Optional[str] = None,
loss_optimizer_options: Optional[Dict[str, Any]] = None,
) -> Run:
"""Create a training run.
If an experiment name is not specified, the run will be created in the default
experiment.
:return: A `Run` object.
"""
if not experiment_name:
experiment = self._default_experiment
else:
experiment = self.get_experiment(name=experiment_name)
if not experiment:
raise ValueError(
(
'Unable to start finetuning run as experiment is `None`. '
'Make sure you have logged in using `finetuner.login(force=True)`.'
)
)
return experiment.create_training_run(
model=model,
train_data=train_data,
eval_data=eval_data,
val_split=val_split,
model_artifact=model_artifact,
run_name=run_name,
description=description,
model_options=model_options or {},
loss=loss,
miner=miner,
miner_options=miner_options,
optimizer=optimizer,
optimizer_options=optimizer_options,
learning_rate=learning_rate,
epochs=epochs,
batch_size=batch_size,
callbacks=callbacks or [],
scheduler=scheduler,
scheduler_options=scheduler_options,
freeze=freeze,
output_dim=output_dim,
device=device,
num_workers=num_workers,
to_onnx=to_onnx,
csv_options=csv_options,
public=public,
num_items_per_class=num_items_per_class,
sampler=sampler,
loss_optimizer=loss_optimizer,
loss_optimizer_options=loss_optimizer_options,
)
@login_required
def create_synthesis_run(
self,
query_data: Union[str, List[str], DocumentArray],
corpus_data: Union[str, List[str], DocumentArray],
models: SynthesisModels,
num_relations: int = 10,
run_name: Optional[str] = None,
description: Optional[str] = None,
experiment_name: Optional[str] = None,
device: str = 'cuda',
num_workers: int = 4,
csv_options: Optional[CSVOptions] = None,
public: bool = False,
) -> Run:
"""Create a synthesis run.
If an experiment name is not specified, the run will be created in the default
experiment.
:return: A `Run` object.
"""
if not experiment_name:
experiment = self._default_experiment
else:
experiment = self.get_experiment(name=experiment_name)
if not experiment:
raise ValueError(
(
'Unable to start finetuning run as experiment is `None`. '
'Make sure you have logged in using `finetuner.login(force=True)`.'
)
)
return experiment.create_synthesis_run(
query_data=query_data,
corpus_data=corpus_data,
models=models,
num_relations=num_relations,
run_name=run_name,
description=description,
device=device,
num_workers=num_workers,
csv_options=csv_options,
public=public,
)
@login_required
def get_run(self, run_name: str, experiment_name: Optional[str] = None) -> Run:
"""Get run by its name and (optional) experiment.
If an experiment name is not specified, we'll look for the run in the default
experiment.
:param run_name: Name of the run.
:param experiment_name: Optional name of the experiment.
:return: A `Run` object.
"""
if not experiment_name:
experiment = self._default_experiment
else:
experiment = self.get_experiment(name=experiment_name)
return experiment.get_run(name=run_name)
@login_required
def list_runs(
self, experiment_name: Optional[str] = None, page: int = 1, size: int = 50
) -> List[Run]:
"""List all created runs inside a given experiment.
If no experiment is specified, list runs for all available experiments.
:param experiment_name: The name of the experiment.
:param page: The page index.
:param size: Number of runs to retrieve.
:return: List of all runs.
..note:: `page` and `size` works together. For example, page 1 size 50 gives
the 50 runs in the first page. To get 50-100, set `page` as 2.
..note:: The maximum number for `size` per page is 100.
"""
if not experiment_name:
experiments = self.list_experiments()
else:
experiments = [self.get_experiment(name=experiment_name)]
runs = []
for experiment in experiments:
runs.extend(experiment.list_runs(page=page, size=size))
return runs
@login_required
def delete_run(self, run_name: str, experiment_name: Optional[str] = None):
"""Delete a run.
If an experiment name is not specified, we'll look for the run in the default
experiment.
:param run_name: Name of the run.
:param experiment_name: Optional name of the experiment.
"""
if not experiment_name:
experiment = self._default_experiment
else:
experiment = self.get_experiment(name=experiment_name)
experiment.delete_run(name=run_name)
@login_required
def delete_runs(self, experiment_name: Optional[str] = None):
"""Delete every run.
If an experiment name is not specified, we'll delete every run across all
experiments.
:param experiment_name: Optional name of the experiment.
"""
if not experiment_name:
experiments = self.list_experiments()
else:
experiments = [self.get_experiment(name=experiment_name)]
for experiment in experiments:
experiment.delete_runs()
@login_required
def get_token(self) -> str:
return hubble.Auth.get_auth_token()