Skip to content

CLI Interface

docetl.cli.run(yaml_file=typer.Argument(..., help='Path to the YAML file containing the pipeline configuration'), max_threads=typer.Option(None, help='Maximum number of threads to use for running operations'))

Run the configuration specified in the YAML file.

Parameters:

Name Type Description Default
yaml_file Path

Path to the YAML file containing the pipeline configuration.

Argument(..., help='Path to the YAML file containing the pipeline configuration')
max_threads int | None

Maximum number of threads to use for running operations.

Option(None, help='Maximum number of threads to use for running operations')
Source code in docetl/cli.py
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
@app.command()
def run(
    yaml_file: Path = typer.Argument(
        ..., help="Path to the YAML file containing the pipeline configuration"
    ),
    max_threads: int | None = typer.Option(
        None, help="Maximum number of threads to use for running operations"
    ),
):
    """
    Run the configuration specified in the YAML file.

    Args:
        yaml_file (Path): Path to the YAML file containing the pipeline configuration.
        max_threads (int | None): Maximum number of threads to use for running operations.
    """
    # Get the current working directory (where the user called the command)
    cwd = os.getcwd()

    # Load .env file from the current working directory
    env_file = os.path.join(cwd, ".env")
    if os.path.exists(env_file):
        load_dotenv(env_file)

    runner = DSLRunner.from_yaml(str(yaml_file), max_threads=max_threads)
    runner.load_run_save()

docetl.cli.build(yaml_file=typer.Argument(..., help='Path to the YAML file containing the pipeline configuration'), max_threads=typer.Option(None, help='Maximum number of threads to use for running operations'))

Optimize a pipeline using MOAR (Multi-Objective Agentic Rewrites).

Requires an optimizer_config section in the YAML with at least
  • evaluation_file: path to a Python file with a @docetl.register_eval function
  • metric_key: key to extract from evaluation results

Models are auto-detected from API keys (OPENAI_API_KEY, ANTHROPIC_API_KEY, GEMINI_API_KEY, AZURE_API_KEY) unless available_models is set explicitly.

Parameters:

Name Type Description Default
yaml_file Path

Path to the YAML pipeline file.

Argument(..., help='Path to the YAML file containing the pipeline configuration')
max_threads int | None

Maximum number of threads for operations.

Option(None, help='Maximum number of threads to use for running operations')
Source code in docetl/cli.py
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
@app.command()
def build(
    yaml_file: Path = typer.Argument(
        ..., help="Path to the YAML file containing the pipeline configuration"
    ),
    max_threads: int | None = typer.Option(
        None, help="Maximum number of threads to use for running operations"
    ),
):
    """
    Optimize a pipeline using MOAR (Multi-Objective Agentic Rewrites).

    Requires an optimizer_config section in the YAML with at least:
      - evaluation_file: path to a Python file with a @docetl.register_eval function
      - metric_key: key to extract from evaluation results

    Models are auto-detected from API keys (OPENAI_API_KEY, ANTHROPIC_API_KEY,
    GEMINI_API_KEY, AZURE_API_KEY) unless available_models is set explicitly.

    Args:
        yaml_file (Path): Path to the YAML pipeline file.
        max_threads (int | None): Maximum number of threads for operations.
    """
    cwd = os.getcwd()

    env_file = os.path.join(cwd, ".env")
    if os.path.exists(env_file):
        load_dotenv(env_file)

    import yaml as yaml_lib

    with open(yaml_file, "r") as f:
        config = yaml_lib.safe_load(f)

    optimizer_config = config.get("optimizer_config", {})
    if not optimizer_config:
        example_yaml = """optimizer_config:
  evaluation_file: evaluate.py
  metric_key: score
  save_dir: ./moar_results       # optional, defaults to temp dir
  max_iterations: 20              # optional, defaults to 20
  # available_models:             # optional, auto-detected from API keys
  #   - gpt-4.1
  #   - anthropic/claude-sonnet-4-6"""

        error_panel = Panel(
            f"[bold red]Error:[/bold red] optimizer_config section is required in YAML.\n\n"
            f"[bold]Example:[/bold]\n"
            f"[dim]{example_yaml}[/dim]",
            title="[bold red]Missing optimizer_config[/bold red]",
            border_style="red",
        )
        console.print(error_panel)
        raise typer.Exit(1)

    required_fields = {
        "evaluation_file": "Path to evaluation function file",
        "metric_key": "Key to extract from evaluation results",
    }

    missing_fields = [
        field for field in required_fields if not optimizer_config.get(field)
    ]
    if missing_fields:
        fields_table = Table(
            show_header=True, header_style="bold cyan", box=None, padding=(0, 2)
        )
        fields_table.add_column("Field", style="yellow")
        fields_table.add_column("Description", style="dim")

        for field, desc in required_fields.items():
            style = "bold red" if field in missing_fields else "dim"
            fields_table.add_row(f"[{style}]{field}[/{style}]", desc)

        missing_list = ", ".join([f"[bold red]{f}[/bold red]" for f in missing_fields])

        from rich.console import Group

        error_group = Group(
            f"[bold red]Missing required fields:[/bold red] {missing_list}\n",
            "[bold]Required fields:[/bold]",
            fields_table,
        )

        error_panel = Panel(
            error_group,
            title="[bold red]Missing Required Fields[/bold red]",
            border_style="red",
        )
        console.print(error_panel)
        raise typer.Exit(1)

    from docetl.moar.optimizer import MOAROptimizer
    from docetl.utils_evaluation import load_custom_evaluate_func

    try:
        # Resolve dataset path for wrapping the eval function
        dataset_path = optimizer_config.get("dataset_path")
        if not dataset_path:
            ds = config.get("datasets", {})
            if ds:
                _, ds_cfg = next(iter(ds.items()))
                dataset_path = ds_cfg.get("path", "")

        eval_fn = load_custom_evaluate_func(
            optimizer_config["evaluation_file"],
            dataset_path or "",
        )

        opt = MOAROptimizer(
            pipeline=str(yaml_file),
            eval_fn=eval_fn,
            metric_key=optimizer_config["metric_key"],
            models=optimizer_config.get("available_models"),
            agent_model=optimizer_config.get("rewrite_agent_model")
            or optimizer_config.get("model"),
            max_iterations=optimizer_config.get("max_iterations", 20),
            save_dir=optimizer_config.get("save_dir"),
            exploration_weight=optimizer_config.get("exploration_weight", 1.414),
            dataset_path=optimizer_config.get("dataset_path"),
        )
        result = opt.optimize()

        typer.echo("\n✅ MOAR optimization completed successfully!")
        typer.echo(f"   Frontier: {len(result.frontier)} pipelines")
        best = result.best()
        if best:
            typer.echo(
                f"   Best accuracy: {best.accuracy:.4f} (cost: ${best.cost:.4f})"
            )
        cheapest = result.cheapest()
        if cheapest and cheapest is not best:
            typer.echo(
                f"   Cheapest: ${cheapest.cost:.4f} (accuracy: {cheapest.accuracy:.4f})"
            )
        if result.save_dir:
            typer.echo(f"\n   Optimized pipelines saved to: {result.save_dir}/")
            for p in result.frontier:
                tag = ""
                if best and p is best:
                    tag = " (best accuracy)"
                elif cheapest and p is cheapest:
                    tag = " (cheapest)"
                typer.echo(f"     - {Path(p.yaml_path).name}{tag}")
    except Exception as e:
        typer.echo(f"Error running MOAR optimization: {e}", err=True)
        raise typer.Exit(1)

docetl.cli.clear_cache()

Clear the LLM cache stored on disk.

Source code in docetl/cli.py
195
196
197
198
199
200
@app.command()
def clear_cache():
    """
    Clear the LLM cache stored on disk.
    """
    cc()