Skip to content

docetl.cli

docetl.cli.run(yaml_file=typer.Argument(..., help='Path to the YAML file containing the pipeline configuration'), max_threads=typer.Option(None, help='Maximum number of threads to use for running operations'))

Run the configuration specified in the YAML file.

Parameters:

Name Type Description Default
yaml_file Path

Path to the YAML file containing the pipeline configuration.

Argument(..., help='Path to the YAML file containing the pipeline configuration')
max_threads Optional[int]

Maximum number of threads to use for running operations.

Option(None, help='Maximum number of threads to use for running operations')
Source code in docetl/cli.py
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
@app.command()
def run(
    yaml_file: Path = typer.Argument(
        ..., help="Path to the YAML file containing the pipeline configuration"
    ),
    max_threads: Optional[int] = typer.Option(
        None, help="Maximum number of threads to use for running operations"
    ),
):
    """
    Run the configuration specified in the YAML file.

    Args:
        yaml_file (Path): Path to the YAML file containing the pipeline configuration.
        max_threads (Optional[int]): Maximum number of threads to use for running operations.
    """
    runner = DSLRunner.from_yaml(str(yaml_file), max_threads=max_threads)
    runner.run()

docetl.cli.build(yaml_file=typer.Argument(..., help='Path to the YAML file containing the pipeline configuration'), max_threads=typer.Option(None, help='Maximum number of threads to use for running operations'), model=typer.Option('gpt-4o', help='Model to use for optimization'), resume=typer.Option(False, help='Resume optimization from a previous build that may have failed'), timeout=typer.Option(60, help='Timeout for optimization operations in seconds'))

Build and optimize the configuration specified in the YAML file.

Parameters:

Name Type Description Default
yaml_file Path

Path to the YAML file containing the pipeline configuration.

Argument(..., help='Path to the YAML file containing the pipeline configuration')
max_threads Optional[int]

Maximum number of threads to use for running operations.

Option(None, help='Maximum number of threads to use for running operations')
model str

Model to use for optimization. Defaults to "gpt-4o".

Option('gpt-4o', help='Model to use for optimization')
resume bool

Whether to resume optimization from a previous run. Defaults to False.

Option(False, help='Resume optimization from a previous build that may have failed')
timeout int

Timeout for optimization operations in seconds. Defaults to 60.

Option(60, help='Timeout for optimization operations in seconds')
Source code in docetl/cli.py
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
@app.command()
def build(
    yaml_file: Path = typer.Argument(
        ..., help="Path to the YAML file containing the pipeline configuration"
    ),
    max_threads: Optional[int] = typer.Option(
        None, help="Maximum number of threads to use for running operations"
    ),
    model: str = typer.Option("gpt-4o", help="Model to use for optimization"),
    resume: bool = typer.Option(
        False, help="Resume optimization from a previous build that may have failed"
    ),
    timeout: int = typer.Option(
        60, help="Timeout for optimization operations in seconds"
    ),
):
    """
    Build and optimize the configuration specified in the YAML file.

    Args:
        yaml_file (Path): Path to the YAML file containing the pipeline configuration.
        max_threads (Optional[int]): Maximum number of threads to use for running operations.
        model (str): Model to use for optimization. Defaults to "gpt-4o".
        resume (bool): Whether to resume optimization from a previous run. Defaults to False.
        timeout (int): Timeout for optimization operations in seconds. Defaults to 60.
    """
    optimizer = Optimizer.from_yaml(
        str(yaml_file),
        max_threads=max_threads,
        model=model,
        timeout=timeout,
        resume=resume,
    )
    optimizer.optimize()
    optimizer.save_optimized_config()

docetl.cli.clear_cache()

Clear the LLM cache stored on disk.

Source code in docetl/cli.py
70
71
72
73
74
75
@app.command()
def clear_cache():
    """
    Clear the LLM cache stored on disk.
    """
    cc()