Skip to content

docetl.cli

docetl.cli.run(yaml_file=typer.Argument(..., help='Path to the YAML file containing the pipeline configuration'), max_threads=typer.Option(None, help='Maximum number of threads to use for running operations'))

Run the configuration specified in the YAML file.

Parameters:

Name Type Description Default
yaml_file Path

Path to the YAML file containing the pipeline configuration.

Argument(..., help='Path to the YAML file containing the pipeline configuration')
max_threads Optional[int]

Maximum number of threads to use for running operations.

Option(None, help='Maximum number of threads to use for running operations')
Source code in docetl/cli.py
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
@app.command()
def run(
    yaml_file: Path = typer.Argument(
        ..., help="Path to the YAML file containing the pipeline configuration"
    ),
    max_threads: Optional[int] = typer.Option(
        None, help="Maximum number of threads to use for running operations"
    ),
):
    """
    Run the configuration specified in the YAML file.

    Args:
        yaml_file (Path): Path to the YAML file containing the pipeline configuration.
        max_threads (Optional[int]): Maximum number of threads to use for running operations.
    """
    # Get the current working directory (where the user called the command)
    cwd = os.getcwd()

    # Load .env file from the current working directory
    env_file = os.path.join(cwd, ".env")
    if os.path.exists(env_file):
        load_dotenv(env_file)

    runner = DSLRunner.from_yaml(str(yaml_file), max_threads=max_threads)
    runner.load_run_save()

docetl.cli.build(yaml_file=typer.Argument(..., help='Path to the YAML file containing the pipeline configuration'), max_threads=typer.Option(None, help='Maximum number of threads to use for running operations'), model=typer.Option('gpt-4o', help='Model to use for optimization'), resume=typer.Option(False, help='Resume optimization from a previous build that may have failed'), timeout=typer.Option(60, help='Timeout for optimization operations in seconds'))

Build and optimize the configuration specified in the YAML file.

Parameters:

Name Type Description Default
yaml_file Path

Path to the YAML file containing the pipeline configuration.

Argument(..., help='Path to the YAML file containing the pipeline configuration')
max_threads Optional[int]

Maximum number of threads to use for running operations.

Option(None, help='Maximum number of threads to use for running operations')
model str

Model to use for optimization. Defaults to "gpt-4o".

Option('gpt-4o', help='Model to use for optimization')
resume bool

Whether to resume optimization from a previous run. Defaults to False.

Option(False, help='Resume optimization from a previous build that may have failed')
timeout int

Timeout for optimization operations in seconds. Defaults to 60.

Option(60, help='Timeout for optimization operations in seconds')
Source code in docetl/cli.py
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
@app.command()
def build(
    yaml_file: Path = typer.Argument(
        ..., help="Path to the YAML file containing the pipeline configuration"
    ),
    max_threads: Optional[int] = typer.Option(
        None, help="Maximum number of threads to use for running operations"
    ),
    model: str = typer.Option("gpt-4o", help="Model to use for optimization"),
    resume: bool = typer.Option(
        False, help="Resume optimization from a previous build that may have failed"
    ),
    timeout: int = typer.Option(
        60, help="Timeout for optimization operations in seconds"
    ),
):
    """
    Build and optimize the configuration specified in the YAML file.

    Args:
        yaml_file (Path): Path to the YAML file containing the pipeline configuration.
        max_threads (Optional[int]): Maximum number of threads to use for running operations.
        model (str): Model to use for optimization. Defaults to "gpt-4o".
        resume (bool): Whether to resume optimization from a previous run. Defaults to False.
        timeout (int): Timeout for optimization operations in seconds. Defaults to 60.
    """
    # Get the current working directory (where the user called the command)
    cwd = os.getcwd()

    # Load .env file from the current working directory
    env_file = os.path.join(cwd, ".env")
    if os.path.exists(env_file):
        load_dotenv(env_file)

    runner = DSLRunner.from_yaml(str(yaml_file), max_threads=max_threads)
    runner.optimize(
        save=True, return_pipeline=False, model=model, resume=resume, timeout=timeout
    )

docetl.cli.clear_cache()

Clear the LLM cache stored on disk.

Source code in docetl/cli.py
83
84
85
86
87
88
@app.command()
def clear_cache():
    """
    Clear the LLM cache stored on disk.
    """
    cc()