Skip to content

CLI Interface

docetl.cli.run(yaml_file=typer.Argument(..., help='Path to the YAML file containing the pipeline configuration'), max_threads=typer.Option(None, help='Maximum number of threads to use for running operations'))

Run the configuration specified in the YAML file.

Parameters:

Name Type Description Default
yaml_file Path

Path to the YAML file containing the pipeline configuration.

Argument(..., help='Path to the YAML file containing the pipeline configuration')
max_threads int | None

Maximum number of threads to use for running operations.

Option(None, help='Maximum number of threads to use for running operations')
Source code in docetl/cli.py
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
@app.command()
def run(
    yaml_file: Path = typer.Argument(
        ..., help="Path to the YAML file containing the pipeline configuration"
    ),
    max_threads: int | None = typer.Option(
        None, help="Maximum number of threads to use for running operations"
    ),
):
    """
    Run the configuration specified in the YAML file.

    Args:
        yaml_file (Path): Path to the YAML file containing the pipeline configuration.
        max_threads (int | None): Maximum number of threads to use for running operations.
    """
    # Get the current working directory (where the user called the command)
    cwd = os.getcwd()

    # Load .env file from the current working directory
    env_file = os.path.join(cwd, ".env")
    if os.path.exists(env_file):
        load_dotenv(env_file)

    runner = DSLRunner.from_yaml(str(yaml_file), max_threads=max_threads)
    runner.load_run_save()

docetl.cli.build(yaml_file=typer.Argument(..., help='Path to the YAML file containing the pipeline configuration'), optimizer=typer.Option('moar', '--optimizer', '-o', help="Optimizer to use: 'moar' (default) or 'v1' (deprecated)"), max_threads=typer.Option(None, help='Maximum number of threads to use for running operations'), resume=typer.Option(False, help='Resume optimization from a previous build that may have failed'), save_path=typer.Option(None, help='Path to save the optimized pipeline configuration'))

Build and optimize the configuration specified in the YAML file. Any arguments passed here will override the values in the YAML file.

Parameters:

Name Type Description Default
yaml_file Path

Path to the YAML file containing the pipeline configuration.

Argument(..., help='Path to the YAML file containing the pipeline configuration')
optimizer str

Optimizer to use - 'moar' or 'v1' (required).

Option('moar', '--optimizer', '-o', help="Optimizer to use: 'moar' (default) or 'v1' (deprecated)")
max_threads int | None

Maximum number of threads to use for running operations.

Option(None, help='Maximum number of threads to use for running operations')
resume bool

Whether to resume optimization from a previous run. Defaults to False.

Option(False, help='Resume optimization from a previous build that may have failed')
save_path Path

Path to save the optimized pipeline configuration.

Option(None, help='Path to save the optimized pipeline configuration')
Source code in docetl/cli.py
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
@app.command()
def build(
    yaml_file: Path = typer.Argument(
        ..., help="Path to the YAML file containing the pipeline configuration"
    ),
    optimizer: str = typer.Option(
        "moar",
        "--optimizer",
        "-o",
        help="Optimizer to use: 'moar' (default) or 'v1' (deprecated)",
    ),
    max_threads: int | None = typer.Option(
        None, help="Maximum number of threads to use for running operations"
    ),
    resume: bool = typer.Option(
        False, help="Resume optimization from a previous build that may have failed"
    ),
    save_path: Path = typer.Option(
        None, help="Path to save the optimized pipeline configuration"
    ),
):
    """
    Build and optimize the configuration specified in the YAML file.
    Any arguments passed here will override the values in the YAML file.

    Args:
        yaml_file (Path): Path to the YAML file containing the pipeline configuration.
        optimizer (str): Optimizer to use - 'moar' or 'v1' (required).
        max_threads (int | None): Maximum number of threads to use for running operations.
        resume (bool): Whether to resume optimization from a previous run. Defaults to False.
        save_path (Path): Path to save the optimized pipeline configuration.
    """
    # Get the current working directory (where the user called the command)
    cwd = os.getcwd()

    # Load .env file from the current working directory
    env_file = os.path.join(cwd, ".env")
    if os.path.exists(env_file):
        load_dotenv(env_file)

    # Validate optimizer choice
    if optimizer not in ["moar", "v1"]:
        typer.echo(
            f"Error: optimizer must be 'moar' or 'v1', got '{optimizer}'", err=True
        )
        raise typer.Exit(1)

    # Load YAML to check for optimizer_config
    import yaml as yaml_lib

    with open(yaml_file, "r") as f:
        config = yaml_lib.safe_load(f)

    if optimizer == "moar":
        optimizer_config = config.get("optimizer_config", {})
        if not optimizer_config:
            example_yaml = """optimizer_config:
  type: moar
  save_dir: ./moar_results
  available_models:
    - gpt-5
    - gpt-4o
  evaluation_file: workloads/medical/evaluate_medications.py
  metric_key: medication_extraction_score
  max_iterations: 40
  model: gpt-5"""

            error_panel = Panel(
                f"[bold red]Error:[/bold red] optimizer_config section is required in YAML for MOAR optimizer.\n\n"
                f"[bold]Example:[/bold]\n"
                f"[dim]{example_yaml}[/dim]\n\n"
                f"[yellow]Note:[/yellow] dataset_name is inferred from the 'datasets' section. "
                f"dataset_path can optionally be specified in optimizer_config, otherwise it's inferred from the 'datasets' section.",
                title="[bold red]Missing optimizer_config[/bold red]",
                border_style="red",
            )
            console.print(error_panel)
            raise typer.Exit(1)

        if optimizer_config.get("type") != "moar":
            error_panel = Panel(
                f"[bold red]Error:[/bold red] optimizer_config.type must be 'moar', got '[yellow]{optimizer_config.get('type')}[/yellow]'",
                title="[bold red]Invalid optimizer type[/bold red]",
                border_style="red",
            )
            console.print(error_panel)
            raise typer.Exit(1)

        # Validate required fields in optimizer_config
        required_fields = {
            "save_dir": "Output directory for MOAR results",
            "available_models": "List of model names to use",
            "evaluation_file": "Path to evaluation function file",
            "metric_key": "Key to extract from evaluation results",
            "max_iterations": "Number of MOARSearch iterations",
            "model": "LLM model name for directive instantiation",
        }

        missing_fields = [
            field for field in required_fields if not optimizer_config.get(field)
        ]
        if missing_fields:
            # Create a table for required fields
            fields_table = Table(
                show_header=True, header_style="bold cyan", box=None, padding=(0, 2)
            )
            fields_table.add_column("Field", style="yellow")
            fields_table.add_column("Description", style="dim")

            for field, desc in required_fields.items():
                style = "bold red" if field in missing_fields else "dim"
                fields_table.add_row(f"[{style}]{field}[/{style}]", desc)

            # Create example YAML
            example_yaml = """optimizer_config:
  type: moar
  save_dir: ./moar_results
  available_models:
    - gpt-5
    - gpt-4o
  evaluation_file: workloads/medical/evaluate_medications.py
  metric_key: medication_extraction_score
  max_iterations: 40
  model: gpt-5"""

            missing_list = ", ".join(
                [f"[bold red]{f}[/bold red]" for f in missing_fields]
            )

            # Build error content with table rendered separately
            from rich.console import Group

            error_group = Group(
                f"[bold red]Missing required fields:[/bold red] {missing_list}\n",
                "[bold]Required fields:[/bold]",
                fields_table,
                f"\n[bold]Example:[/bold]\n[dim]{example_yaml}[/dim]\n",
                "[yellow]Note:[/yellow] dataset_name is inferred from the 'datasets' section. "
                "dataset_path can optionally be specified in optimizer_config, otherwise it's inferred from the 'datasets' section.",
            )

            error_panel = Panel(
                error_group,
                title="[bold red]Missing Required Fields[/bold red]",
                border_style="red",
            )
            console.print(error_panel)
            raise typer.Exit(1)

        # Run MOAR optimization
        from docetl.moar.cli_helpers import run_moar_optimization

        try:
            results = run_moar_optimization(
                yaml_path=str(yaml_file),
                optimizer_config=optimizer_config,
            )
            typer.echo("\n✅ MOAR optimization completed successfully!")
            typer.echo(f"   Results saved to: {optimizer_config.get('save_dir')}")
            if results.get("evaluation_file"):
                typer.echo(f"   Evaluation: {results['evaluation_file']}")
        except Exception as e:
            typer.echo(f"Error running MOAR optimization: {e}", err=True)
            raise typer.Exit(1)

    else:  # v1 optimizer (deprecated)
        console.print(
            Panel(
                "[bold yellow]Warning:[/bold yellow] The V1 optimizer is deprecated. "
                "Please use MOAR optimizer instead: [bold]docetl build pipeline.yaml --optimizer moar[/bold]",
                title="[bold yellow]Deprecated Optimizer[/bold yellow]",
                border_style="yellow",
            )
        )
        runner = DSLRunner.from_yaml(str(yaml_file), max_threads=max_threads)
        runner.optimize(
            save=True,
            return_pipeline=False,
            resume=resume,
            save_path=save_path,
        )

docetl.cli.clear_cache()

Clear the LLM cache stored on disk.

Source code in docetl/cli.py
229
230
231
232
233
234
@app.command()
def clear_cache():
    """
    Clear the LLM cache stored on disk.
    """
    cc()