Skip to content

CLI Interface

docetl.cli.run(yaml_file=typer.Argument(..., help='Path to the YAML file containing the pipeline configuration'), max_threads=typer.Option(None, help='Maximum number of threads to use for running operations'))

Run the configuration specified in the YAML file.

Parameters:

Name Type Description Default
yaml_file Path

Path to the YAML file containing the pipeline configuration.

Argument(..., help='Path to the YAML file containing the pipeline configuration')
max_threads Optional[int]

Maximum number of threads to use for running operations.

Option(None, help='Maximum number of threads to use for running operations')
Source code in docetl/cli.py
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
@app.command()
def run(
    yaml_file: Path = typer.Argument(
        ..., help="Path to the YAML file containing the pipeline configuration"
    ),
    max_threads: Optional[int] = typer.Option(
        None, help="Maximum number of threads to use for running operations"
    ),
):
    """
    Run the configuration specified in the YAML file.

    Args:
        yaml_file (Path): Path to the YAML file containing the pipeline configuration.
        max_threads (Optional[int]): Maximum number of threads to use for running operations.
    """
    # Get the current working directory (where the user called the command)
    cwd = os.getcwd()

    # Load .env file from the current working directory
    env_file = os.path.join(cwd, ".env")
    if os.path.exists(env_file):
        load_dotenv(env_file)

    runner = DSLRunner.from_yaml(str(yaml_file), max_threads=max_threads)
    runner.load_run_save()

docetl.cli.build(yaml_file=typer.Argument(..., help='Path to the YAML file containing the pipeline configuration'), max_threads=typer.Option(None, help='Maximum number of threads to use for running operations'), resume=typer.Option(False, help='Resume optimization from a previous build that may have failed'), save_path=typer.Option(None, help='Path to save the optimized pipeline configuration'))

Build and optimize the configuration specified in the YAML file. Any arguments passed here will override the values in the YAML file.

Parameters:

Name Type Description Default
yaml_file Path

Path to the YAML file containing the pipeline configuration.

Argument(..., help='Path to the YAML file containing the pipeline configuration')
max_threads Optional[int]

Maximum number of threads to use for running operations.

Option(None, help='Maximum number of threads to use for running operations')
model str

Model to use for optimization. Defaults to "gpt-4o".

required
resume bool

Whether to resume optimization from a previous run. Defaults to False.

Option(False, help='Resume optimization from a previous build that may have failed')
save_path Path

Path to save the optimized pipeline configuration.

Option(None, help='Path to save the optimized pipeline configuration')
Source code in docetl/cli.py
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
@app.command()
def build(
    yaml_file: Path = typer.Argument(
        ..., help="Path to the YAML file containing the pipeline configuration"
    ),
    max_threads: Optional[int] = typer.Option(
        None, help="Maximum number of threads to use for running operations"
    ),
    resume: bool = typer.Option(
        False, help="Resume optimization from a previous build that may have failed"
    ),
    save_path: Path = typer.Option(
        None, help="Path to save the optimized pipeline configuration"
    ),
):
    """
    Build and optimize the configuration specified in the YAML file.
    Any arguments passed here will override the values in the YAML file.

    Args:
        yaml_file (Path): Path to the YAML file containing the pipeline configuration.
        max_threads (Optional[int]): Maximum number of threads to use for running operations.
        model (str): Model to use for optimization. Defaults to "gpt-4o".
        resume (bool): Whether to resume optimization from a previous run. Defaults to False.
        save_path (Path): Path to save the optimized pipeline configuration.
    """
    # Get the current working directory (where the user called the command)
    cwd = os.getcwd()

    # Load .env file from the current working directory
    env_file = os.path.join(cwd, ".env")
    if os.path.exists(env_file):
        load_dotenv(env_file)

    runner = DSLRunner.from_yaml(str(yaml_file), max_threads=max_threads)
    runner.optimize(
        save=True,
        return_pipeline=False,
        resume=resume,
        save_path=save_path,
    )

docetl.cli.clear_cache()

Clear the LLM cache stored on disk.

Source code in docetl/cli.py
85
86
87
88
89
90
@app.command()
def clear_cache():
    """
    Clear the LLM cache stored on disk.
    """
    cc()