diff --git a/cli/decompose/decompose.py b/cli/decompose/decompose.py index bcf1cca9d..60ced3c9c 100644 --- a/cli/decompose/decompose.py +++ b/cli/decompose/decompose.py @@ -1,6 +1,7 @@ import json import keyword import re +import shutil from enum import StrEnum from graphlib import TopologicalSorter from pathlib import Path @@ -16,7 +17,8 @@ class DecompVersion(StrEnum): latest = "latest" v1 = "v1" - # v2 = "v2" + v2 = "v2" + # v3 = "v3" this_file_dir = Path(__file__).resolve().parent @@ -307,27 +309,32 @@ def run( backend_api_key=backend_api_key, ) - # Verify that all user variables are properly defined before use - # This may reorder subtasks if dependencies are out of order - decomp_data = verify_user_variables(decomp_data, input_var) + decomp_dir = out_dir / out_name + val_fn_dir = decomp_dir / "validations" + val_fn_dir.mkdir(parents=True) - with open(out_dir / f"{out_name}.json", "w") as f: + (val_fn_dir / "__init__.py").touch() + + for constraint in decomp_data["identified_constraints"]: + if constraint["val_fn"] is not None: + with open(val_fn_dir / f"{constraint['val_fn_name']}.py", "w") as f: + f.write(constraint["val_fn"] + "\n") + + with open(decomp_dir / f"{out_name}.json", "w") as f: json.dump(decomp_data, f, indent=2) - with open(out_dir / f"{out_name}.py", "w") as f: + with open(decomp_dir / f"{out_name}.py", "w") as f: f.write( m_template.render( - subtasks=decomp_data["subtasks"], user_inputs=input_var + subtasks=decomp_data["subtasks"], + user_inputs=input_var, + identified_constraints=decomp_data["identified_constraints"], ) + "\n" ) except Exception: - created_json = Path(out_dir / f"{out_name}.json") - created_py = Path(out_dir / f"{out_name}.py") - - if created_json.exists() and created_json.is_file(): - created_json.unlink() - if created_py.exists() and created_py.is_file(): - created_py.unlink() + decomp_dir = out_dir / out_name + if decomp_dir.exists() and decomp_dir.is_dir(): + shutil.rmtree(decomp_dir) raise Exception diff --git a/cli/decompose/m_decomp_result_v1.py.jinja2 b/cli/decompose/m_decomp_result_v1.py.jinja2 index 7aa1d54f4..1f1e3646e 100644 --- a/cli/decompose/m_decomp_result_v1.py.jinja2 +++ b/cli/decompose/m_decomp_result_v1.py.jinja2 @@ -4,6 +4,14 @@ import os import textwrap import mellea +{%- if "code" in identified_constraints | map(attribute="val_strategy") %} +from mellea.stdlib.requirement import req +{% for c in identified_constraints %} +{%- if c.val_fn %} +from validations.{{ c.val_fn_name }} import validate_input as {{ c.val_fn_name }} +{%- endif %} +{%- endfor %} +{%- endif %} m = mellea.start_session() {%- if user_inputs %} @@ -30,7 +38,14 @@ except KeyError as e: {%- if item.constraints %} requirements=[ {%- for c in item.constraints %} + {%- if c.val_fn %} + req( + {{ c.constraint | tojson}}, + validation_fn={{ c.val_fn_name }}, + ), + {%- else %} {{ c.constraint | tojson}}, + {%- endif %} {%- endfor %} ], {%- else %} diff --git a/cli/decompose/m_decomp_result_v2.py.jinja2 b/cli/decompose/m_decomp_result_v2.py.jinja2 new file mode 100644 index 000000000..9b1bb13c6 --- /dev/null +++ b/cli/decompose/m_decomp_result_v2.py.jinja2 @@ -0,0 +1,91 @@ +{% if user_inputs -%} +import os +{% endif -%} +import textwrap + +import mellea +{%- if "code" in identified_constraints | map(attribute="val_strategy") %} +from mellea.stdlib.requirement import req +{% for c in identified_constraints %} +{%- if c.val_fn %} +from validations.{{ c.val_fn_name }} import validate_input as {{ c.val_fn_name }} +{%- endif %} +{%- endfor %} +{%- endif %} + +m = mellea.start_session() +{%- if user_inputs %} + + +# User Input Variables +try: + {%- for var in user_inputs %} + {{ var | lower }} = os.environ["{{ var | upper }}"] + {%- endfor %} +except KeyError as e: + print(f"ERROR: One or more required environment variables are not set; {e}") + exit(1) +{%- endif %} +{%- for item in subtasks %} + + +{{ item.tag | lower }}_gnrl = textwrap.dedent( + R""" + {{ item.general_instructions | trim | indent(width=4, first=False) }} + """.strip() +) +{{ item.tag | lower }} = m.instruct( + {%- if not item.input_vars_required %} + {{ item.subtask[3:] | trim | tojson }}, + {%- else %} + textwrap.dedent( + R""" + {{ item.subtask[3:] | trim }} + + Here are the input variables and their content: + {%- for var in item.input_vars_required %} + + - {{ var | upper }} = {{ "{{" }}{{ var | upper }}{{ "}}" }} + {%- endfor %} + """.strip() + ), + {%- endif %} + {%- if item.constraints %} + requirements=[ + {%- for c in item.constraints %} + {%- if c.val_fn %} + req( + {{ c.constraint | tojson}}, + validation_fn={{ c.val_fn_name }}, + ), + {%- else %} + {{ c.constraint | tojson}}, + {%- endif %} + {%- endfor %} + ], + {%- else %} + requirements=None, + {%- endif %} + {%- if item.input_vars_required %} + user_variables={ + {%- for var in item.input_vars_required %} + {{ var | upper | tojson }}: {{ var | lower }}, + {%- endfor %} + }, + {%- endif %} + grounding_context={ + "GENERAL_INSTRUCTIONS": {{ item.tag | lower }}_gnrl, + {%- for var in item.depends_on %} + {{ var | upper | tojson }}: {{ var | lower }}.value, + {%- endfor %} + }, +) +assert {{ item.tag | lower }}.value is not None, 'ERROR: task "{{ item.tag | lower }}" execution failed' +{%- if loop.last %} + + +final_answer = {{ item.tag | lower }}.value + +print(final_answer) +{%- endif -%} +{%- endfor -%} diff --git a/cli/decompose/pipeline.py b/cli/decompose/pipeline.py index ae70be68a..5d5191734 100644 --- a/cli/decompose/pipeline.py +++ b/cli/decompose/pipeline.py @@ -9,10 +9,11 @@ from .prompt_modules import ( constraint_extractor, - # general_instructions, + general_instructions, subtask_constraint_assign, subtask_list, subtask_prompt_generator, + validation_code_generator, validation_decision, ) from .prompt_modules.subtask_constraint_assign import SubtaskPromptConstraintsItem @@ -20,9 +21,16 @@ from .prompt_modules.subtask_prompt_generator import SubtaskPromptItem +class ConstraintValData(TypedDict): + val_strategy: Literal["code", "llm"] + val_fn: str | None + + class ConstraintResult(TypedDict): constraint: str - validation_strategy: str + val_strategy: Literal["code", "llm"] + val_fn: str | None + val_fn_name: str class DecompSubtasksResult(TypedDict): @@ -30,7 +38,7 @@ class DecompSubtasksResult(TypedDict): tag: str constraints: list[ConstraintResult] prompt_template: str - # general_instructions: str + general_instructions: str input_vars_required: list[str] depends_on: list[str] generated_response: NotRequired[str] @@ -70,7 +78,9 @@ def decompose( case DecompBackend.ollama: m_session = MelleaSession( OllamaModelBackend( - model_id=model_id, model_options={ModelOption.CONTEXT_WINDOW: 16384} + model_id=model_id, + base_url=backend_endpoint, + model_options={ModelOption.CONTEXT_WINDOW: 16384}, ) ) case DecompBackend.openai: @@ -113,11 +123,27 @@ def decompose( m_session, task_prompt, enforce_same_words=False ).parse() - constraint_validation_strategies: dict[str, Literal["code", "llm"]] = { - cons_key: validation_decision.generate(m_session, cons_key).parse() + constraint_val_strategy: dict[ + str, dict[Literal["val_strategy"], Literal["code", "llm"]] + ] = { + cons_key: { + "val_strategy": validation_decision.generate(m_session, cons_key).parse() + } for cons_key in task_prompt_constraints } + constraint_val_data: dict[str, ConstraintValData] = {} + + for cons_key in constraint_val_strategy: + constraint_val_data[cons_key] = { + "val_strategy": constraint_val_strategy[cons_key]["val_strategy"], + "val_fn": None, + } + if constraint_val_data[cons_key]["val_strategy"] == "code": + constraint_val_data[cons_key]["val_fn"] = ( + validation_code_generator.generate(m_session, cons_key).parse() + ) + subtask_prompts: list[SubtaskPromptItem] = subtask_prompt_generator.generate( m_session, task_prompt, @@ -140,14 +166,21 @@ def decompose( constraints=[ { "constraint": cons_str, - "validation_strategy": constraint_validation_strategies[cons_str], + "val_strategy": constraint_val_data[cons_str]["val_strategy"], + "val_fn_name": f"val_fn_{task_prompt_constraints.index(cons_str) + 1}", + # >> Always include generated "val_fn" code (experimental) + "val_fn": constraint_val_data[cons_str]["val_fn"], + # >> Include generated "val_fn" code only for the last subtask (experimental) + # "val_fn": constraint_val_data[cons_str]["val_fn"] + # if subtask_i + 1 == len(subtask_prompts_with_constraints) + # else None, } for cons_str in subtask_data.constraints ], prompt_template=subtask_data.prompt_template, - # general_instructions=general_instructions.generate( - # m_session, input_str=subtask_data.prompt_template - # ).parse(), + general_instructions=general_instructions.generate( + m_session, input_str=subtask_data.prompt_template + ).parse(), input_vars_required=list( dict.fromkeys( # Remove duplicates while preserving the original order. [ @@ -171,7 +204,7 @@ def decompose( ) ), ) - for subtask_data in subtask_prompts_with_constraints + for subtask_i, subtask_data in enumerate(subtask_prompts_with_constraints) ] return DecompPipelineResult( @@ -180,9 +213,11 @@ def decompose( identified_constraints=[ { "constraint": cons_str, - "validation_strategy": constraint_validation_strategies[cons_str], + "val_strategy": constraint_val_data[cons_str]["val_strategy"], + "val_fn": constraint_val_data[cons_str]["val_fn"], + "val_fn_name": f"val_fn_{cons_i + 1}", } - for cons_str in task_prompt_constraints + for cons_i, cons_str in enumerate(task_prompt_constraints) ], subtasks=decomp_subtask_result, ) diff --git a/cli/decompose/prompt_modules/__init__.py b/cli/decompose/prompt_modules/__init__.py index 19b7079e3..922bdcbe8 100644 --- a/cli/decompose/prompt_modules/__init__.py +++ b/cli/decompose/prompt_modules/__init__.py @@ -7,4 +7,7 @@ from .subtask_prompt_generator import ( subtask_prompt_generator as subtask_prompt_generator, ) +from .validation_code_generator import ( + validation_code_generator as validation_code_generator, +) from .validation_decision import validation_decision as validation_decision diff --git a/cli/decompose/prompt_modules/subtask_constraint_assign/_prompt/system_template.jinja2 b/cli/decompose/prompt_modules/subtask_constraint_assign/_prompt/system_template.jinja2 index 30baaf93a..e5cad42e7 100644 --- a/cli/decompose/prompt_modules/subtask_constraint_assign/_prompt/system_template.jinja2 +++ b/cli/decompose/prompt_modules/subtask_constraint_assign/_prompt/system_template.jinja2 @@ -8,7 +8,7 @@ You will be provided with the following 4 parameters inside their respective tag 4. : A list of candidate (possible) constraints that can be assigned to the target task. -The list contain the constraints of all tasks on the , your job is to filter and select only the constraints belonging to your target task. +The is a list of constraints identified for the entire , your job is to filter and select only the constraints belonging to your target task. It is possible that none of the constraints in the are relevant or related to your target task. Below, enclosed in tags, are instructions to guide you on how to complete your assignment: diff --git a/cli/decompose/prompt_modules/validation_code_generator/__init__.py b/cli/decompose/prompt_modules/validation_code_generator/__init__.py new file mode 100644 index 000000000..dfb4bd0ce --- /dev/null +++ b/cli/decompose/prompt_modules/validation_code_generator/__init__.py @@ -0,0 +1,7 @@ +from ._exceptions import ( + BackendGenerationError as BackendGenerationError, + TagExtractionError as TagExtractionError, +) +from ._validation_code_generator import ( + validation_code_generator as validation_code_generator, +) diff --git a/cli/decompose/prompt_modules/validation_code_generator/_exceptions.py b/cli/decompose/prompt_modules/validation_code_generator/_exceptions.py new file mode 100644 index 000000000..d808b613d --- /dev/null +++ b/cli/decompose/prompt_modules/validation_code_generator/_exceptions.py @@ -0,0 +1,24 @@ +from typing import Any + + +class ValidationCodeGeneratorError(Exception): + def __init__(self, error_message: str, **kwargs: dict[str, Any]): + self.error_message = error_message + self.__dict__.update(kwargs) + super().__init__( + f'Module Error "validation_code_generator"; {self.error_message}' + ) + + +class BackendGenerationError(ValidationCodeGeneratorError): + """Raised when LLM generation fails in the "validation_code_generator" prompt module.""" + + def __init__(self, error_message: str, **kwargs: dict[str, Any]): + super().__init__(error_message, **kwargs) + + +class TagExtractionError(ValidationCodeGeneratorError): + """Raised when tag extraction fails in the "validation_code_generator" prompt module.""" + + def __init__(self, error_message: str, **kwargs: dict[str, Any]): + super().__init__(error_message, **kwargs) diff --git a/cli/decompose/prompt_modules/validation_code_generator/_prompt/__init__.py b/cli/decompose/prompt_modules/validation_code_generator/_prompt/__init__.py new file mode 100644 index 000000000..0b985cbe6 --- /dev/null +++ b/cli/decompose/prompt_modules/validation_code_generator/_prompt/__init__.py @@ -0,0 +1,5 @@ +from ._icl_examples import icl_examples as default_icl_examples +from ._prompt import ( + get_system_prompt as get_system_prompt, + get_user_prompt as get_user_prompt, +) diff --git a/cli/decompose/prompt_modules/validation_code_generator/_prompt/_icl_examples/__init__.py b/cli/decompose/prompt_modules/validation_code_generator/_prompt/_icl_examples/__init__.py new file mode 100644 index 000000000..052fe7c99 --- /dev/null +++ b/cli/decompose/prompt_modules/validation_code_generator/_prompt/_icl_examples/__init__.py @@ -0,0 +1,2 @@ +from ._icl_examples import icl_examples as icl_examples +from ._types import ICLExample as ICLExample diff --git a/cli/decompose/prompt_modules/validation_code_generator/_prompt/_icl_examples/_example_1/__init__.py b/cli/decompose/prompt_modules/validation_code_generator/_prompt/_icl_examples/_example_1/__init__.py new file mode 100644 index 000000000..1f9f32ea5 --- /dev/null +++ b/cli/decompose/prompt_modules/validation_code_generator/_prompt/_icl_examples/_example_1/__init__.py @@ -0,0 +1 @@ +from ._example import example as example diff --git a/cli/decompose/prompt_modules/validation_code_generator/_prompt/_icl_examples/_example_1/_example.py b/cli/decompose/prompt_modules/validation_code_generator/_prompt/_icl_examples/_example_1/_example.py new file mode 100644 index 000000000..9bb4e23da --- /dev/null +++ b/cli/decompose/prompt_modules/validation_code_generator/_prompt/_icl_examples/_example_1/_example.py @@ -0,0 +1,24 @@ +# ruff: noqa: W293 +from .._types import ICLExample + +constraint_requirement = """You must not use any uppercase letters""" + +validation_function = """def validate_input(input: str) -> bool: + \""" + Validates that the input contains only lowercase letters. + + Args: + input (str): The input to validate + + Returns: + bool: True if all characters are lowercase, False otherwise + \""" + try: + return answer.islower() + except Exception: + return False""" + +example: ICLExample = { + "constraint_requirement": constraint_requirement, + "validation_function": validation_function, +} diff --git a/cli/decompose/prompt_modules/validation_code_generator/_prompt/_icl_examples/_example_2/__init__.py b/cli/decompose/prompt_modules/validation_code_generator/_prompt/_icl_examples/_example_2/__init__.py new file mode 100644 index 000000000..1f9f32ea5 --- /dev/null +++ b/cli/decompose/prompt_modules/validation_code_generator/_prompt/_icl_examples/_example_2/__init__.py @@ -0,0 +1 @@ +from ._example import example as example diff --git a/cli/decompose/prompt_modules/validation_code_generator/_prompt/_icl_examples/_example_2/_example.py b/cli/decompose/prompt_modules/validation_code_generator/_prompt/_icl_examples/_example_2/_example.py new file mode 100644 index 000000000..6e2d98fe0 --- /dev/null +++ b/cli/decompose/prompt_modules/validation_code_generator/_prompt/_icl_examples/_example_2/_example.py @@ -0,0 +1,31 @@ +# ruff: noqa: W293 +from .._types import ICLExample + +constraint_requirement = """The answer must be a JSON with the following keys: +1. "subject" +2. "content\"""" + +validation_function = """import json + +def validate_input(input: str) -> bool: + \""" + Validates that the input is a JSON with required keys: subject and content. + + Args: + input (str): The input to validate + + Returns: + bool: True if JSON has required keys, False otherwise + \""" + try: + data = json.loads(response) + return isinstance(data, dict) and "subject" in data and "content" in data + except json.JSONDecodeError: + return False + except Exception: + return False""" + +example: ICLExample = { + "constraint_requirement": constraint_requirement, + "validation_function": validation_function, +} diff --git a/cli/decompose/prompt_modules/validation_code_generator/_prompt/_icl_examples/_example_3/__init__.py b/cli/decompose/prompt_modules/validation_code_generator/_prompt/_icl_examples/_example_3/__init__.py new file mode 100644 index 000000000..1f9f32ea5 --- /dev/null +++ b/cli/decompose/prompt_modules/validation_code_generator/_prompt/_icl_examples/_example_3/__init__.py @@ -0,0 +1 @@ +from ._example import example as example diff --git a/cli/decompose/prompt_modules/validation_code_generator/_prompt/_icl_examples/_example_3/_example.py b/cli/decompose/prompt_modules/validation_code_generator/_prompt/_icl_examples/_example_3/_example.py new file mode 100644 index 000000000..65070a6ed --- /dev/null +++ b/cli/decompose/prompt_modules/validation_code_generator/_prompt/_icl_examples/_example_3/_example.py @@ -0,0 +1,58 @@ +# ruff: noqa: W293 +from .._types import ICLExample + +constraint_requirement = "Return a list of requirements, using dash bullets (-), where each item begins with the relevant entity" + +validation_function = """def validate_input(input: str) -> bool: + \""" + Validates that the input is a list of requirements using dash bullets, + where each item begins with the relevant entity. + + Args: + input (str): The input to validate + + Returns: + bool: True if input follows the required format, False otherwise + \""" + try: + if not input or not isinstance(input, str): + return False + + lines = input.strip().split('\n') + + # Check if all lines are empty + if not any(line.strip() for line in lines): + return False + + for line in lines: + line = line.strip() + + # Skip empty lines + if not line: + continue + + # Check if line starts with a dash bullet + if not line.startswith('- '): + return False + + # Check if there's content after the dash bullet + content = line[2:].strip() # Remove '- ' prefix + if not content: + return False + + # Check if content has an entity (word) at the beginning + words = content.split() + if not words: + return False + + # Entity should be the first word - just check it exists + # We're not validating what constitutes a valid entity here + + return True + except Exception: + return False""" + +example: ICLExample = { + "constraint_requirement": constraint_requirement, + "validation_function": validation_function, +} diff --git a/cli/decompose/prompt_modules/validation_code_generator/_prompt/_icl_examples/_example_4/__init__.py b/cli/decompose/prompt_modules/validation_code_generator/_prompt/_icl_examples/_example_4/__init__.py new file mode 100644 index 000000000..1f9f32ea5 --- /dev/null +++ b/cli/decompose/prompt_modules/validation_code_generator/_prompt/_icl_examples/_example_4/__init__.py @@ -0,0 +1 @@ +from ._example import example as example diff --git a/cli/decompose/prompt_modules/validation_code_generator/_prompt/_icl_examples/_example_4/_example.py b/cli/decompose/prompt_modules/validation_code_generator/_prompt/_icl_examples/_example_4/_example.py new file mode 100644 index 000000000..f1af01ab1 --- /dev/null +++ b/cli/decompose/prompt_modules/validation_code_generator/_prompt/_icl_examples/_example_4/_example.py @@ -0,0 +1,31 @@ +# ruff: noqa: W293 +from .._types import ICLExample + +constraint_requirement = 'Avoid the words "daughter-in-law" and "grandson"' + +validation_function = """def validate_input(input: str) -> bool: + \""" + Validates that the input does not contain the words "daughter-in-law" and "grandson". + + Args: + input (str): The input to validate + + Returns: + bool: True if neither word is found, False otherwise + \""" + try: + if not input: + return False + + # Convert to lowercase for case-insensitive comparison + input_lower = input.lower() + + # Check if either forbidden word is present + return "daughter-in-law" not in input_lower and "grandson" not in input_lower + except Exception: + return False""" + +example: ICLExample = { + "constraint_requirement": constraint_requirement, + "validation_function": validation_function, +} diff --git a/cli/decompose/prompt_modules/validation_code_generator/_prompt/_icl_examples/_icl_examples.py b/cli/decompose/prompt_modules/validation_code_generator/_prompt/_icl_examples/_icl_examples.py new file mode 100644 index 000000000..c018d2e41 --- /dev/null +++ b/cli/decompose/prompt_modules/validation_code_generator/_prompt/_icl_examples/_icl_examples.py @@ -0,0 +1,5 @@ +from ._example_1 import example as example_1 +from ._example_2 import example as example_2 +from ._types import ICLExample + +icl_examples: list[ICLExample] = [example_1, example_2] diff --git a/cli/decompose/prompt_modules/validation_code_generator/_prompt/_icl_examples/_types.py b/cli/decompose/prompt_modules/validation_code_generator/_prompt/_icl_examples/_types.py new file mode 100644 index 000000000..bdd1f2372 --- /dev/null +++ b/cli/decompose/prompt_modules/validation_code_generator/_prompt/_icl_examples/_types.py @@ -0,0 +1,6 @@ +from typing import TypedDict + + +class ICLExample(TypedDict): + constraint_requirement: str + validation_function: str diff --git a/cli/decompose/prompt_modules/validation_code_generator/_prompt/_prompt.py b/cli/decompose/prompt_modules/validation_code_generator/_prompt/_prompt.py new file mode 100644 index 000000000..b324180fc --- /dev/null +++ b/cli/decompose/prompt_modules/validation_code_generator/_prompt/_prompt.py @@ -0,0 +1,19 @@ +from pathlib import Path + +from jinja2 import Environment, FileSystemLoader + +from ._icl_examples import ICLExample, icl_examples as default_icl_examples + +this_file_dir = Path(__file__).resolve().parent + +environment = Environment(loader=FileSystemLoader(this_file_dir), autoescape=False) +system_template = environment.get_template("system_template.jinja2") +user_template = environment.get_template("user_template.jinja2") + + +def get_system_prompt(icl_examples: list[ICLExample] = default_icl_examples) -> str: + return system_template.render(icl_examples=icl_examples).strip() + + +def get_user_prompt(constraint_requirement: str) -> str: + return user_template.render(constraint_requirement=constraint_requirement).strip() diff --git a/cli/decompose/prompt_modules/validation_code_generator/_prompt/system_template.jinja2 b/cli/decompose/prompt_modules/validation_code_generator/_prompt/system_template.jinja2 new file mode 100644 index 000000000..7b414d0d5 --- /dev/null +++ b/cli/decompose/prompt_modules/validation_code_generator/_prompt/system_template.jinja2 @@ -0,0 +1,77 @@ +You are a Python developer specialized in writing validation functions based on natural language constraints or requirements. + +## Function Requirements + +You will be provided with a constraint/requirement inside the tags. +Your task is to write a Python function capable of validating the against a text input to your function. + +Your code must: +- Be a single Python function. +- Take exactly one string parameter (the text input to be validated). +- Return a boolean value (True if valid or False if invalid). +- Use only standard Python libraries. No third-party dependencies. +- Be deterministic and self-contained. +- If the constraint/requirement mentions data that was not provided, just return `False` (don't need to implement code). + +## Output Format + +Your response must be structured as follows: +- Your Python function must be inside the tags. +- The function signature must be: `def validate_input(input: str) -> bool:`. +- Always enclose your code on a "try..except Exception:" clause and return `False` in case of exceptions. + +## Examples + +Here are some complete examples showing constraints/requirements and their corresponding validation functions: + +{% for item in icl_examples -%} + + +{{ item["constraint_requirement"] }} + + +{{ item["validation_function"] }} + + +All tags are closed and my assignment is finished. + + +{% endfor -%} +That concludes the complete examples of your assignment. + +## Additional Instructions + +When writing your answer, follow these additional instructions below to be successful: +1. The function signature must be: `def validate_input(input: str) -> bool:` +2. The function must handle `None` and empty string inputs by returning `False` +3. Use appropriate Python standard library modules (re, json, etc.) as needed +4. Ensure the function is simple and doesn't have unnecessary complexity +5. The validation logic should directly correspond to the provided constraint/requirement + +## Common Validation Patterns + +Here are some typical validation scenarios you might encounter: + +1. Character limit validation: + - Check if the answer has a specific number of characters or words + - Example: "The answer must be less than 100 characters" + +2. Format validation: + - Validate JSON structure, XML format, or other structured data + - Example: "The answer must be valid JSON with 'name' and 'age' fields" + +3. Content validation: + - Check for specific content patterns like uppercase letters, numbers, etc. + - Example: "The answer must contain at least one uppercase letter" + +4. Pattern matching: + - Use regex to validate specific patterns + - Example: "The answer must be in the format 'Name: [text], Age: [number]'" + +Important: Use only standard Python libraries that don't require additional installation. +Important: Your function must be deterministic and produce consistent results. +Important: You must always close the tags that were opened by using their corresponding close tag. You will be penalized if you don't close all tags. + +Very Important: After closing all tags, finish your assignment by writing (without the double quotes): "All tags are closed and my assignment is finished.". +Very Important: Always enclose your code on a "try..except Exception:" clause and return `False` in case of exceptions. +Very Important: If the constraint/requirement is not clear, or missing information, just return `False`. diff --git a/cli/decompose/prompt_modules/validation_code_generator/_prompt/user_template.jinja2 b/cli/decompose/prompt_modules/validation_code_generator/_prompt/user_template.jinja2 new file mode 100644 index 000000000..867af52e4 --- /dev/null +++ b/cli/decompose/prompt_modules/validation_code_generator/_prompt/user_template.jinja2 @@ -0,0 +1,9 @@ +## Requirements: +- The function name must be: "validate_input" +- The function signature must be: `def validate_input(input: str) -> bool:` + +Now, here is the constraint/requirement for you to write a Python validation function: + + +{{ constraint_requirement }} + diff --git a/cli/decompose/prompt_modules/validation_code_generator/_validation_code_generator.py b/cli/decompose/prompt_modules/validation_code_generator/_validation_code_generator.py new file mode 100644 index 000000000..55949c963 --- /dev/null +++ b/cli/decompose/prompt_modules/validation_code_generator/_validation_code_generator.py @@ -0,0 +1,113 @@ +import re +from collections.abc import Callable +from typing import Any, TypeVar, final + +from mellea import MelleaSession +from mellea.backends import ModelOption +from mellea.stdlib.components.chat import Message + +from .._prompt_modules import PromptModule, PromptModuleString +from ._exceptions import BackendGenerationError, TagExtractionError +from ._prompt import get_system_prompt, get_user_prompt + +T = TypeVar("T") + +RE_VALIDATION_FUNCTION = re.compile( + r"(.+?)", flags=re.IGNORECASE | re.DOTALL +) + + +@final +class _ValidationCodeGenerator(PromptModule): + @staticmethod + def _default_parser(generated_str: str) -> str: + r"""Default parser of the `validation_code_generator` module. + + _**Disclaimer**: This is a LLM-prompting module, so the results will vary depending + on the size and capabilities of the LLM used. The results are also not guaranteed, so + take a look at this module's Exceptions and plan for unreliable results._ + + Args: + generated_str (`str`): The LLM's answer to be parsed. + + Returns: + str: The extracted Python validation function code. + + Raises: + TagExtractionError: An error occurred trying to extract content from the + generated output. The LLM probably failed to open and close + the \ tags. + """ + validation_function_match = re.search(RE_VALIDATION_FUNCTION, generated_str) + + validation_function_str: str | None = ( + validation_function_match.group(1).strip() + if validation_function_match + else None + ) + + if validation_function_str is None: + raise TagExtractionError( + 'LLM failed to generate correct tags for extraction: ""' + ) + + return validation_function_str + + def generate( + self, + mellea_session: MelleaSession, + input_str: str | None, + max_new_tokens: int = 4096, + parser: Callable[[str], T] = _default_parser, # type: ignore[assignment] + # About the mypy ignore above: https://github.com/python/mypy/issues/3737 + **kwargs: dict[str, Any], + ) -> PromptModuleString[T]: + """Generates a Python validation function based on a provided constraint/requirement. + + Args: + mellea_session (`MelleaSession`): A mellea session with a backend. + input_str (`str`): Natural language constraint/requirement to generate validation code for. + prompt (`str`, optional): The original task prompt for context. Defaults to None. + max_new_tokens (`int`, optional): Maximum tokens to generate. + Defaults to `4096`. + parser (`Callable[[str], Any]`, optional): A string parsing function. + Defaults to `_ValidationCodeGenerator._default_parser`. + + Returns: + PromptModuleString: A `PromptModuleString` class containing the generated output. + + The `PromptModuleString` class behaves like a `str`, but with an additional `parse()` method + to execute the parsing function passed in the `parser` argument of + this method (the `parser` argument defaults to `_ValidationCodeGenerator._default_parser`). + + Raises: + BackendGenerationError: Some error occurred during the LLM generation call. + """ + assert input_str is not None, 'This module requires the "input_str" argument' + + system_prompt = get_system_prompt() + user_prompt = get_user_prompt(constraint_requirement=input_str) + + action = Message("user", user_prompt) + + try: + gen_result = mellea_session.act( + action=action, + model_options={ + ModelOption.SYSTEM_PROMPT: system_prompt, + ModelOption.TEMPERATURE: 0, + ModelOption.MAX_NEW_TOKENS: max_new_tokens, + }, + ).value + except Exception as e: + raise BackendGenerationError(f"LLM generation failed: {e}") + + if gen_result is None: + raise BackendGenerationError( + "LLM generation failed: value attribute is None" + ) + + return PromptModuleString(gen_result, parser) + + +validation_code_generator = _ValidationCodeGenerator() diff --git a/cli/decompose/prompt_modules/validation_decision/_prompt/system_template.jinja2 b/cli/decompose/prompt_modules/validation_decision/_prompt/system_template.jinja2 index 8e5cb00fb..fc0be317c 100644 --- a/cli/decompose/prompt_modules/validation_decision/_prompt/system_template.jinja2 +++ b/cli/decompose/prompt_modules/validation_decision/_prompt/system_template.jinja2 @@ -1,14 +1,14 @@ -You are a Validation Decision Expert specialized in determining whether prompt requirements can be validated deterministically by writing Python code or if they're best suited for LLM validation. +You are a Validation Decision Expert specialized in determining whether prompt requirements can be validated deterministically, by writing Python code, or if they're best suited for LLM validation. ## Decision Criteria ### Code Validation A requirement should be classified as "code" if it: -- Can be checked with deterministic algorithms +- Can be validated deterministically - Involves structured data validation (e.g., JSON schema, regex patterns) -- Requires mathematical computations or logical operations - Can be validated with simple string operations - Has clearly defined success/failure criteria that can be programmatically determined +- Is a straightforward requirement to validate the task output ### LLM Validation A requirement should be classified as "llm" if it: @@ -59,6 +59,7 @@ When writing your answer, follow these additional instructions below to be succe 3. After closing all tags, finish your assignment by writing (without the double quotes): "All tags are closed and my assignment is finished." Important: You must always close the tags that were opened by using their corresponding close tag. You will be penalized if you don't close all tags. +Important: The "code" classification is usually for validating the task output format or other deterministic requirements. Your response must contain exactly one of these two words inside tags: - code diff --git a/docs/examples/m_decompose/README.md b/docs/examples/m_decompose/README.md new file mode 100644 index 000000000..28fdf94e8 --- /dev/null +++ b/docs/examples/m_decompose/README.md @@ -0,0 +1,104 @@ +# m_decompose + +This module with the example demonstrates **task decomposition pipelines** built with *Mellea generative programs*. + +Instead of solving a complex task with a single prompt, the system first **decomposes the task into subtasks**, then executes them sequentially through a assembled pipeline. + +This pattern improves reasoning quality, interpretability, and modularity in LLM-powered systems. + +--- + +# Overview + +Many complex tasks contain multiple reasoning steps. +The `m_decompose` pipeline handles this by splitting the task into smaller units. + +``` +User Request + ↓ +Task Decomposition + ↓ +Subtasks + ↓ +Task Execution + ↓ +Final Result +``` + +Rather than writing a large prompt, the workflow uses **generative modules and reusable prompts**. + +--- + +# Directory + +``` +m_decompose/ +├── decompose.py +├── pipeline.py +├── prompt_modules +└── README.md +``` + +**decompose.py** + +Generates the refined subtasks from the user request. + +**pipeline.py** + +Runs the full workflow: + +1. decompose the task +2. execute subtasks +3. aggregate results + +**prompt_modules** + +Reusable prompt components used by the pipeline. + +**m_decomp_result_v1.py.jinja2** + +Template used to format the final output. + +--- + +# Quick Start + +Example usage: + +```python +from mellea.cli.decompose.pipeline import decompose, DecompBackend +import json + +query = """Our company is planning a large corporate team-building event later this year, and I need help putting together a complete strategy and execution plan. + +The event will likely involve somewhere between 100 and 300 employees and the goal is to strengthen team cohesion, improve collaboration across departments, boost morale, and create a memorable experience for everyone involved. I’d like you to help me create a full event strategy document that covers everything from early planning through post-event evaluation.""" + +result = decompose( + task_prompt=query, + model_id="mistralai/Mistral-Small-3.2-24B-Instruct-2506", + backend=DecompBackend.openai, + backend_endpoint="http://localhost:8000/v1", + backend_api_key="EMPTY", +) + +print(json.dumps(result, indent=2, ensure_ascii=False)) +``` + + +The pipeline then executes each step and produces the final answer. + +--- + +# What This Example Shows + +This example highlights three key ideas: + +- **Task Decomposition** — break complex problems into smaller reasoning steps. +- **Generative Pipelines** — conduct LLM workflows as an programmatic pipeline instead of single prompts. +- **Modular Prompts** — separate prompt design from execution logic using reusable modules. + +--- + +# Summary + +`m_decompose` shows how to build **LLM pipelines** using task decomposition. diff --git a/docs/examples/m_decompose/decompose_using_cli.sh b/docs/examples/m_decompose/decompose_using_cli.sh index 169596b56..32612f02f 100644 --- a/docs/examples/m_decompose/decompose_using_cli.sh +++ b/docs/examples/m_decompose/decompose_using_cli.sh @@ -1 +1 @@ -m decompose run --out-dir ./ --prompt-file ./docs/examples/m_decomposeexample_decompose_prompt.txt \ No newline at end of file +m decompose run --out-dir ./ --prompt-file example_decompose_prompt.txt diff --git a/docs/examples/m_decompose/python/python_decompose_example.py b/docs/examples/m_decompose/python/python_decompose_example.py index a1b951cc3..81962fcd3 100644 --- a/docs/examples/m_decompose/python/python_decompose_example.py +++ b/docs/examples/m_decompose/python/python_decompose_example.py @@ -176,7 +176,7 @@ def display_results(result: DecompPipelineResult): print(f"\n🔍 Constraints Identified ({len(result['identified_constraints'])}):") for i, constraint in enumerate(result["identified_constraints"], 1): print(f" {i}. {constraint['constraint']}") - print(f" Validation: {constraint['validation_strategy']}") + print(f" Validation: {constraint['val_strategy']}") print(f"\n🎯 Detailed Subtasks ({len(result['subtasks'])}):") for i, subtask_detail in enumerate(result["subtasks"], 1): diff --git a/docs/examples/m_decompose/small/m_decomp_result.json b/docs/examples/m_decompose/small/m_decomp_result.json index 50cb497c9..378af18fe 100644 --- a/docs/examples/m_decompose/small/m_decomp_result.json +++ b/docs/examples/m_decompose/small/m_decomp_result.json @@ -11,35 +11,35 @@ "identified_constraints": [ { "constraint": "The shopping list must include decorations, food, drinks, party favors, and a cake that serves at least 20 people", - "validation_strategy": "llm" + "val_strategy": "llm" }, { "constraint": "Suggest 5-7 age-appropriate party games or activities that fit the unicorn theme and can keep the kids entertained for about 3 hours", - "validation_strategy": "llm" + "val_strategy": "llm" }, { "constraint": "Write out a timeline for the party day starting from 2 hours before guests arrive until cleanup is done showing exactly when to do food prep, decoration setup, when each activity should happen, and when to serve food and cake", - "validation_strategy": "llm" + "val_strategy": "llm" }, { "constraint": "Draft a cute invitation text that includes all the important details like date, time, location, RSVP information, and any special instructions about allergies or what kids should bring or wear", - "validation_strategy": "llm" + "val_strategy": "llm" }, { "constraint": "Create a backup plan in case it rains", - "validation_strategy": "llm" + "val_strategy": "llm" }, { "constraint": "Everything must stay within a total budget of about ~$400", - "validation_strategy": "code" + "val_strategy": "code" }, { "constraint": "Follow basic food safety guidelines especially for kids with common allergies like nuts and dairy", - "validation_strategy": "llm" + "val_strategy": "llm" }, { "constraint": "The party theme colors should be pink, purple, and gold to match the unicorn decorations", - "validation_strategy": "llm" + "val_strategy": "llm" } ], "subtasks": [ @@ -49,19 +49,19 @@ "constraints": [ { "constraint": "The shopping list must include decorations, food, drinks, party favors, and a cake that serves at least 20 people", - "validation_strategy": "llm" + "val_strategy": "llm" }, { "constraint": "Everything must stay within a total budget of about ~$400", - "validation_strategy": "code" + "val_strategy": "code" }, { "constraint": "Follow basic food safety guidelines especially for kids with common allergies like nuts and dairy", - "validation_strategy": "llm" + "val_strategy": "llm" }, { "constraint": "The party theme colors should be pink, purple, and gold to match the unicorn decorations", - "validation_strategy": "llm" + "val_strategy": "llm" } ], "prompt_template": "Your task is to create a detailed shopping list with estimated costs for decorations, food, drinks, party favors, and a cake that serves at least 20 people, ensuring the total budget stays within $400. Follow these steps to accomplish your task:\n\n1. **Understand the Requirements**:\n - The party is for an 8-year-old daughter who loves unicorns.\n - The party theme colors are pink, purple, and gold.\n - The party will have 15 kids from her class, but the cake should serve at least 20 people.\n - The total budget should not exceed $400.\n - Follow basic food safety guidelines, especially for kids with common allergies like nuts and dairy.\n\n2. **Categories to Include**:\n - **Decorations**: Unicorn-themed decorations in pink, purple, and gold.\n - **Food**: Finger foods and snacks suitable for kids.\n - **Drinks**: Beverages that are kid-friendly.\n - **Party Favors**: Small gifts or treats for each child.\n - **Cake**: A unicorn-themed cake that serves at least 20 people.\n\n3. **Estimate Costs**:\n - Research and list the estimated costs for each item.\n - Ensure the total cost of all items does not exceed $400.\n\n4. **Create the Shopping List**:\n - Organize the list by categories (decorations, food, drinks, party favors, cake).\n - Include the name of each item, the estimated cost, and the quantity needed.\n - Ensure all items fit within the unicorn theme and the specified colors.\n\n5. **Review the List**:\n - Double-check that all necessary items are included.\n - Verify that the total estimated cost is within the $400 budget.\n\n6. **Output the Shopping List**:\n - Present the shopping list in a clear and organized format.\n - Include the estimated cost for each item and the total estimated cost.\n\nHere is an example structure to guide your writing:\n- **Decorations**:\n - Item 1: [Description], Quantity: [Number], Estimated Cost: [$Amount]\n - Item 2: [Description], Quantity: [Number], Estimated Cost: [$Amount]\n- **Food**:\n - Item 1: [Description], Quantity: [Number], Estimated Cost: [$Amount]\n - Item 2: [Description], Quantity: [Number], Estimated Cost: [$Amount]\n- **Drinks**:\n - Item 1: [Description], Quantity: [Number], Estimated Cost: [$Amount]\n - Item 2: [Description], Quantity: [Number], Estimated Cost: [$Amount]\n- **Party Favors**:\n - Item 1: [Description], Quantity: [Number], Estimated Cost: [$Amount]\n - Item 2: [Description], Quantity: [Number], Estimated Cost: [$Amount]\n- **Cake**:\n - Item 1: [Description], Quantity: [Number], Estimated Cost: [$Amount]\n\nEnsure that the shopping list is comprehensive and meets all the specified requirements.", @@ -74,11 +74,11 @@ "constraints": [ { "constraint": "Suggest 5-7 age-appropriate party games or activities that fit the unicorn theme and can keep the kids entertained for about 3 hours", - "validation_strategy": "llm" + "val_strategy": "llm" }, { "constraint": "The party theme colors should be pink, purple, and gold to match the unicorn decorations", - "validation_strategy": "llm" + "val_strategy": "llm" } ], "prompt_template": "Your task is to suggest 5-7 age-appropriate party games or activities that fit the unicorn theme and can keep the kids entertained for about 3 hours. Follow these steps to accomplish your task:\n\n1. **Understand the Theme and Audience**:\n - The party theme is unicorns, and the target audience is 8-year-old kids.\n - The party should be fun, engaging, and age-appropriate.\n - The theme colors are pink, purple, and gold.\n\n2. **Consider the Duration**:\n - The activities should keep the kids entertained for about 3 hours.\n - Plan a mix of active and passive games to maintain engagement.\n\n3. **Brainstorm Activity Ideas**:\n - Think of games and activities that fit the unicorn theme.\n - Ensure the activities are safe, fun, and suitable for 8-year-olds.\n - Consider both indoor and outdoor activities in case of rain.\n\n4. **List the Activities**:\n - Provide a list of 5-7 activities with brief descriptions.\n - Include any necessary materials or preparations for each activity.\n - Ensure the activities are varied to keep the kids interested.\n\n5. **Review the Shopping List**:\n - Refer to the shopping list created in the previous step to ensure you have all the necessary materials for the activities:\n \n {{SHOPPING_LIST}}\n \n\n6. **Finalize the Activities**:\n - Ensure the activities are feasible within the budget and theme.\n - Make sure the activities are safe and consider any common allergies or dietary restrictions.\n\nHere is an example structure to guide your suggestions:\n- **Activity 1**: [Description]\n- **Activity 2**: [Description]\n- **Activity 3**: [Description]\n- **Activity 4**: [Description]\n- **Activity 5**: [Description]\n- **Activity 6**: [Description]\n- **Activity 7**: [Description]\n\nEnsure each activity is clearly described and fits the unicorn theme. You should write only the activities, do not include the guidance structure.", @@ -93,11 +93,11 @@ "constraints": [ { "constraint": "Write out a timeline for the party day starting from 2 hours before guests arrive until cleanup is done showing exactly when to do food prep, decoration setup, when each activity should happen, and when to serve food and cake", - "validation_strategy": "llm" + "val_strategy": "llm" }, { "constraint": "Follow basic food safety guidelines especially for kids with common allergies like nuts and dairy", - "validation_strategy": "llm" + "val_strategy": "llm" } ], "prompt_template": "Your task is to draft a detailed timeline for the birthday party day, starting from 2 hours before guests arrive until cleanup is done. The timeline should include food prep, decoration setup, and scheduling of activities, ensuring the party runs smoothly and stays within the allocated time.\n\nTo accomplish this, follow these steps:\n\n1. **Review the Shopping List and Party Games**:\n First, review the shopping list and party games that have been prepared in the previous steps. This will help you understand the tasks that need to be scheduled and the activities that will take place during the party.\n \n {{SHOPPING_LIST}}\n \n \n {{PARTY_GAMES}}\n \n\n2. **Plan the Timeline**:\n Create a timeline that starts 2 hours before the guests arrive and ends with the cleanup after the party. Include the following key elements:\n - **Decoration Setup**: Schedule the setup of decorations, ensuring they are in place before guests arrive.\n - **Food Prep**: Plan the preparation of food and drinks, including any last-minute tasks that need to be done just before serving.\n - **Activity Schedule**: Allocate time slots for each of the 5-7 party games or activities, ensuring they are spaced out appropriately to keep the kids entertained for about 3 hours.\n - **Food and Cake Serving**: Schedule the serving of food and cake, making sure it fits well within the activity timeline.\n - **Cleanup**: Include a time slot for cleanup after the party, ensuring all tasks are completed efficiently.\n\n3. **Ensure Logical Flow**:\n Make sure the timeline flows logically, with each task leading smoothly into the next. Consider the time required for each activity and the transitions between them.\n\n4. **Include Buffer Time**:\n Add buffer time between activities to account for any unexpected delays or transitions.\n\n5. **Finalize the Timeline**:\n Compile all the scheduled tasks into a clear and concise timeline. Ensure it is easy to follow and includes all necessary details.\n\nHere is an example structure to guide your writing:\n- **2 Hours Before Guests Arrive**: Start decoration setup and initial food prep.\n- **1.5 Hours Before Guests Arrive**: Complete decoration setup and continue food prep.\n- **1 Hour Before Guests Arrive**: Final touches on decorations and food prep.\n- **Guests Arrive**: Welcome guests and begin the first activity.\n- **Activity 1**: [Describe the activity and its duration]\n- **Activity 2**: [Describe the activity and its duration]\n- **Serve Food**: [Schedule the serving of food]\n- **Activity 3**: [Describe the activity and its duration]\n- **Serve Cake**: [Schedule the serving of cake]\n- **Activity 4**: [Describe the activity and its duration]\n- **Activity 5**: [Describe the activity and its duration]\n- **Cleanup**: [Schedule the cleanup tasks]\n\nEnsure that each time slot is clearly defined and that the timeline is easy to follow. You should write only the timeline, do not include the guidance structure.", @@ -113,11 +113,11 @@ "constraints": [ { "constraint": "Draft a cute invitation text that includes all the important details like date, time, location, RSVP information, and any special instructions about allergies or what kids should bring or wear", - "validation_strategy": "llm" + "val_strategy": "llm" }, { "constraint": "The party theme colors should be pink, purple, and gold to match the unicorn decorations", - "validation_strategy": "llm" + "val_strategy": "llm" } ], "prompt_template": "Your task is to create a cute invitation text for an 8-year-old's unicorn-themed birthday party. The invitation should include all the important details and follow the party's theme colors: pink, purple, and gold.\n\nTo accomplish this, follow these steps:\n\n1. **Gather Party Details**:\n - Review the shopping list, party games, and timeline created in the previous steps to gather all necessary information:\n \n {{SHOPPING_LIST}}\n \n \n {{PARTY_GAMES}}\n \n \n {{PARTY_TIMELINE}}\n \n\n2. **Include Essential Information**:\n - **Date and Time**: Clearly state the date and time of the party.\n - **Location**: Provide the address and any specific instructions for getting to the party location.\n - **RSVP Information**: Include a contact email or phone number for RSVP and any deadline for responding.\n - **Special Instructions**: Mention any allergies, what kids should bring or wear, and any other important notes.\n\n3. **Theme and Tone**:\n - Use a cute and playful tone that fits the unicorn theme.\n - Incorporate the theme colors (pink, purple, and gold) into the design and wording of the invitation.\n\n4. **Format the Invitation**:\n - Use a clear and organized format, such as bullet points or paragraphs, to present the information.\n - Make sure the invitation is easy to read and visually appealing.\n\n5. **Example Structure**:\n - **Header**: A fun and colorful header that says \"Unicorn Birthday Party!\"\n - **Body**:\n - \"You're invited to [Child's Name]'s Unicorn Birthday Party!\"\n - \"Date: [Date]\"\n - \"Time: [Time]\"\n - \"Location: [Location]\"\n - \"RSVP by [Date] to [Email/Phone Number]\"\n - \"Special Instructions: [Allergies, what to bring or wear, etc.]\"\n - **Footer**: A cute closing line like \"Hope to see you there for a magical time!\"\n\n6. **Review and Finalize**:\n - Ensure all important details are included and the invitation is error-free.\n - Make any necessary adjustments to ensure the invitation is clear, cute, and fits the unicorn theme.\n\nFinally, write the cute invitation text based on the above guidelines and provide it as your answer.", @@ -134,7 +134,7 @@ "constraints": [ { "constraint": "Create a backup plan in case it rains", - "validation_strategy": "llm" + "val_strategy": "llm" } ], "prompt_template": "Your task is to develop a backup plan in case it rains, ensuring all activities can be moved indoors. Follow these steps to accomplish your task:\n\nFirst, review the shopping list, party games, timeline, and invitation text from the previous steps to understand the party setup and activities:\n\n{{SHOPPING_LIST}}\n\n\n{{PARTY_GAMES}}\n\n\n{{PARTY_TIMELINE}}\n\n\n{{INVITATION_TEXT}}\n\n\nNext, consider the following factors for the backup plan:\n1. **Indoor Space**: Ensure that the indoor space is large enough to accommodate all the activities and guests comfortably.\n2. **Activity Adjustments**: Modify the party games and activities to fit the indoor space. Consider the following:\n - **Space Constraints**: Ensure that games like \"Pin the Horn on the Unicorn\" or \"Unicorn Ring Toss\" can be played indoors without any hazards.\n - **Noise Levels**: Adjust activities to minimize noise, especially if the party is in a residential area.\n - **Safety**: Ensure that all indoor activities are safe and age-appropriate.\n3. **Food and Drinks**: Plan how to serve food and drinks indoors. Consider using tables or a buffet-style setup to minimize clutter.\n4. **Decorations**: Adjust the decoration setup to fit the indoor space. Ensure that the decorations are still visually appealing and fit the unicorn theme.\n5. **Timeline Adjustments**: Modify the party timeline to account for indoor activities. Ensure that the timeline is still efficient and keeps the kids entertained.\n\nFinally, write a detailed backup plan that includes:\n- **Indoor Activity Schedule**: A list of adjusted activities with their respective times.\n- **Indoor Setup Instructions**: Detailed instructions on how to set up the indoor space, including decorations, food, and drink stations.\n- **Safety Guidelines**: Any additional safety guidelines or considerations for indoor activities.\n\nEnsure that the backup plan is clear, concise, and easy to follow. The plan should allow for a smooth transition from outdoor to indoor activities in case of rain.", @@ -152,15 +152,15 @@ "constraints": [ { "constraint": "Everything must stay within a total budget of about ~$400", - "validation_strategy": "code" + "val_strategy": "code" }, { "constraint": "Follow basic food safety guidelines especially for kids with common allergies like nuts and dairy", - "validation_strategy": "llm" + "val_strategy": "llm" }, { "constraint": "The party theme colors should be pink, purple, and gold to match the unicorn decorations", - "validation_strategy": "llm" + "val_strategy": "llm" } ], "prompt_template": "Your task is to compile the shopping list, party games, timeline, invitation text, and backup plan into a single cohesive output for the birthday party. Follow these steps to accomplish your task:\n\n1. **Review the Shopping List**:\n Carefully review the detailed shopping list with estimated costs for decorations, food, drinks, party favors, and a cake that serves at least 20 people. Ensure the total budget stays within $400.\n \n {{SHOPPING_LIST}}\n \n\n2. **Review the Party Games**:\n Review the suggested 5-7 age-appropriate party games or activities that fit the unicorn theme and can keep the kids entertained for about 3 hours.\n \n {{PARTY_GAMES}}\n \n\n3. **Review the Party Timeline**:\n Review the timeline for the party day starting from 2 hours before guests arrive until cleanup is done, including food prep, decoration setup, and scheduling of activities.\n \n {{PARTY_TIMELINE}}\n \n\n4. **Review the Invitation Text**:\n Review the cute invitation text that includes all the important details like date, time, location, RSVP information, and any special instructions about allergies or what kids should bring or wear.\n \n {{INVITATION_TEXT}}\n \n\n5. **Review the Backup Plan**:\n Review the backup plan in case it rains, ensuring all activities can be moved indoors.\n \n {{BACKUP_PLAN}}\n \n\n6. **Compile the Information**:\n Combine all the reviewed information into a single cohesive output. Ensure that the output is well-organized and easy to follow. Include the following sections in your final output:\n - **Shopping List**: Detailed list with estimated costs.\n - **Party Games**: Suggested activities with descriptions.\n - **Party Timeline**: Step-by-step schedule for the party day.\n - **Invitation Text**: Cute and informative text for parents.\n - **Backup Plan**: Plan for indoor activities in case of rain.\n\n7. **Final Output**:\n Present the compiled information in a clear and concise manner. Ensure that all details are included and that the output is easy to read and understand.\n\nYour final output should be a comprehensive guide that includes all the necessary information for planning and executing the birthday party.", diff --git a/test/decompose/test_decompose.py b/test/decompose/test_decompose.py index 883ff905f..de21d9d31 100644 --- a/test/decompose/test_decompose.py +++ b/test/decompose/test_decompose.py @@ -23,6 +23,7 @@ def test_no_dependencies(self) -> None: { "subtask": "Task A", "tag": "TASK_A", + "general_instructions": "", "constraints": [], "prompt_template": "Do A", "input_vars_required": [], @@ -31,6 +32,7 @@ def test_no_dependencies(self) -> None: { "subtask": "Task B", "tag": "TASK_B", + "general_instructions": "", "constraints": [], "prompt_template": "Do B", "input_vars_required": [], @@ -39,6 +41,7 @@ def test_no_dependencies(self) -> None: { "subtask": "Task C", "tag": "TASK_C", + "general_instructions": "", "constraints": [], "prompt_template": "Do C", "input_vars_required": [], @@ -60,6 +63,7 @@ def test_simple_linear_dependency(self) -> None: { "subtask": "Task C", "tag": "TASK_C", + "general_instructions": "", "constraints": [], "prompt_template": "Do C", "input_vars_required": [], @@ -68,6 +72,7 @@ def test_simple_linear_dependency(self) -> None: { "subtask": "Task B", "tag": "TASK_B", + "general_instructions": "", "constraints": [], "prompt_template": "Do B", "input_vars_required": [], @@ -76,6 +81,7 @@ def test_simple_linear_dependency(self) -> None: { "subtask": "Task A", "tag": "TASK_A", + "general_instructions": "", "constraints": [], "prompt_template": "Do A", "input_vars_required": [], @@ -97,6 +103,7 @@ def test_diamond_dependency(self) -> None: { "subtask": "Task D", "tag": "TASK_D", + "general_instructions": "", "constraints": [], "prompt_template": "Do D", "input_vars_required": [], @@ -105,6 +112,7 @@ def test_diamond_dependency(self) -> None: { "subtask": "Task C", "tag": "TASK_C", + "general_instructions": "", "constraints": [], "prompt_template": "Do C", "input_vars_required": [], @@ -113,6 +121,7 @@ def test_diamond_dependency(self) -> None: { "subtask": "Task B", "tag": "TASK_B", + "general_instructions": "", "constraints": [], "prompt_template": "Do B", "input_vars_required": [], @@ -121,6 +130,7 @@ def test_diamond_dependency(self) -> None: { "subtask": "Task A", "tag": "TASK_A", + "general_instructions": "", "constraints": [], "prompt_template": "Do A", "input_vars_required": [], @@ -142,6 +152,7 @@ def test_case_insensitive_dependencies(self) -> None: { "subtask": "Task B", "tag": "task_b", + "general_instructions": "", "constraints": [], "prompt_template": "Do B", "input_vars_required": [], @@ -150,6 +161,7 @@ def test_case_insensitive_dependencies(self) -> None: { "subtask": "Task A", "tag": "TASK_A", + "general_instructions": "", "constraints": [], "prompt_template": "Do A", "input_vars_required": [], @@ -170,6 +182,7 @@ def test_multiple_independent_chains(self) -> None: { "subtask": "Task B", "tag": "TASK_B", + "general_instructions": "", "constraints": [], "prompt_template": "Do B", "input_vars_required": [], @@ -178,6 +191,7 @@ def test_multiple_independent_chains(self) -> None: { "subtask": "Task A", "tag": "TASK_A", + "general_instructions": "", "constraints": [], "prompt_template": "Do A", "input_vars_required": [], @@ -187,6 +201,7 @@ def test_multiple_independent_chains(self) -> None: { "subtask": "Task D", "tag": "TASK_D", + "general_instructions": "", "constraints": [], "prompt_template": "Do D", "input_vars_required": [], @@ -195,6 +210,7 @@ def test_multiple_independent_chains(self) -> None: { "subtask": "Task C", "tag": "TASK_C", + "general_instructions": "", "constraints": [], "prompt_template": "Do C", "input_vars_required": [], @@ -219,6 +235,7 @@ def test_nonexistent_dependency_ignored(self) -> None: { "subtask": "Task B", "tag": "TASK_B", + "general_instructions": "", "constraints": [], "prompt_template": "Do B", "input_vars_required": [], @@ -230,6 +247,7 @@ def test_nonexistent_dependency_ignored(self) -> None: { "subtask": "Task A", "tag": "TASK_A", + "general_instructions": "", "constraints": [], "prompt_template": "Do A", "input_vars_required": [], @@ -250,6 +268,7 @@ def test_renumbers_subtask_descriptions(self) -> None: { "subtask": "3. Do task C", "tag": "TASK_C", + "general_instructions": "", "constraints": [], "prompt_template": "Do C", "input_vars_required": [], @@ -258,6 +277,7 @@ def test_renumbers_subtask_descriptions(self) -> None: { "subtask": "2. Do task B", "tag": "TASK_B", + "general_instructions": "", "constraints": [], "prompt_template": "Do B", "input_vars_required": [], @@ -266,6 +286,7 @@ def test_renumbers_subtask_descriptions(self) -> None: { "subtask": "1. Do task A", "tag": "TASK_A", + "general_instructions": "", "constraints": [], "prompt_template": "Do A", "input_vars_required": [], @@ -287,6 +308,7 @@ def test_renumbers_only_numbered_subtasks(self) -> None: { "subtask": "2. Numbered task B", "tag": "TASK_B", + "general_instructions": "", "constraints": [], "prompt_template": "Do B", "input_vars_required": [], @@ -295,6 +317,7 @@ def test_renumbers_only_numbered_subtasks(self) -> None: { "subtask": "Unnumbered task A", "tag": "TASK_A", + "general_instructions": "", "constraints": [], "prompt_template": "Do A", "input_vars_required": [], @@ -315,6 +338,7 @@ def test_renumbers_with_complex_reordering(self) -> None: { "subtask": "4. Final task", "tag": "TASK_D", + "general_instructions": "", "constraints": [], "prompt_template": "Do D", "input_vars_required": [], @@ -323,6 +347,7 @@ def test_renumbers_with_complex_reordering(self) -> None: { "subtask": "3. Third task", "tag": "TASK_C", + "general_instructions": "", "constraints": [], "prompt_template": "Do C", "input_vars_required": [], @@ -331,6 +356,7 @@ def test_renumbers_with_complex_reordering(self) -> None: { "subtask": "2. Second task", "tag": "TASK_B", + "general_instructions": "", "constraints": [], "prompt_template": "Do B", "input_vars_required": [], @@ -339,6 +365,7 @@ def test_renumbers_with_complex_reordering(self) -> None: { "subtask": "1. First task", "tag": "TASK_A", + "general_instructions": "", "constraints": [], "prompt_template": "Do A", "input_vars_required": [], @@ -366,6 +393,7 @@ def test_circular_dependency_two_nodes(self) -> None: { "subtask": "Task A", "tag": "TASK_A", + "general_instructions": "", "constraints": [], "prompt_template": "Do A", "input_vars_required": [], @@ -374,6 +402,7 @@ def test_circular_dependency_two_nodes(self) -> None: { "subtask": "Task B", "tag": "TASK_B", + "general_instructions": "", "constraints": [], "prompt_template": "Do B", "input_vars_required": [], @@ -390,6 +419,7 @@ def test_circular_dependency_three_nodes(self) -> None: { "subtask": "Task A", "tag": "TASK_A", + "general_instructions": "", "constraints": [], "prompt_template": "Do A", "input_vars_required": [], @@ -398,6 +428,7 @@ def test_circular_dependency_three_nodes(self) -> None: { "subtask": "Task B", "tag": "TASK_B", + "general_instructions": "", "constraints": [], "prompt_template": "Do B", "input_vars_required": [], @@ -406,6 +437,7 @@ def test_circular_dependency_three_nodes(self) -> None: { "subtask": "Task C", "tag": "TASK_C", + "general_instructions": "", "constraints": [], "prompt_template": "Do C", "input_vars_required": [], @@ -422,6 +454,7 @@ def test_self_dependency(self) -> None: { "subtask": "Task A", "tag": "TASK_A", + "general_instructions": "", "constraints": [], "prompt_template": "Do A", "input_vars_required": [], @@ -459,6 +492,7 @@ def test_no_input_vars_no_dependencies(self) -> None: { "subtask": "Task A", "tag": "TASK_A", + "general_instructions": "", "constraints": [], "prompt_template": "Do A", "input_vars_required": [], @@ -482,6 +516,7 @@ def test_valid_input_vars(self) -> None: { "subtask": "Task A", "tag": "TASK_A", + "general_instructions": "", "constraints": [], "prompt_template": "Do A with {{ USER_INPUT }}", "input_vars_required": ["USER_INPUT"], @@ -504,6 +539,7 @@ def test_case_insensitive_input_vars(self) -> None: { "subtask": "Task A", "tag": "TASK_A", + "general_instructions": "", "constraints": [], "prompt_template": "Do A", "input_vars_required": ["user_input"], # lowercase @@ -527,6 +563,7 @@ def test_valid_dependencies_in_order(self) -> None: { "subtask": "Task A", "tag": "TASK_A", + "general_instructions": "", "constraints": [], "prompt_template": "Do A", "input_vars_required": [], @@ -535,6 +572,7 @@ def test_valid_dependencies_in_order(self) -> None: { "subtask": "Task B", "tag": "TASK_B", + "general_instructions": "", "constraints": [], "prompt_template": "Do B", "input_vars_required": [], @@ -559,6 +597,7 @@ def test_dependencies_out_of_order_triggers_reorder(self) -> None: { "subtask": "Task B", "tag": "TASK_B", + "general_instructions": "", "constraints": [], "prompt_template": "Do B", "input_vars_required": [], @@ -567,6 +606,7 @@ def test_dependencies_out_of_order_triggers_reorder(self) -> None: { "subtask": "Task A", "tag": "TASK_A", + "general_instructions": "", "constraints": [], "prompt_template": "Do A", "input_vars_required": [], @@ -591,6 +631,7 @@ def test_complex_reordering(self) -> None: { "subtask": "Task D", "tag": "TASK_D", + "general_instructions": "", "constraints": [], "prompt_template": "Do D", "input_vars_required": [], @@ -599,6 +640,7 @@ def test_complex_reordering(self) -> None: { "subtask": "Task C", "tag": "TASK_C", + "general_instructions": "", "constraints": [], "prompt_template": "Do C", "input_vars_required": [], @@ -607,6 +649,7 @@ def test_complex_reordering(self) -> None: { "subtask": "Task B", "tag": "TASK_B", + "general_instructions": "", "constraints": [], "prompt_template": "Do B", "input_vars_required": [], @@ -615,6 +658,7 @@ def test_complex_reordering(self) -> None: { "subtask": "Task A", "tag": "TASK_A", + "general_instructions": "", "constraints": [], "prompt_template": "Do A", "input_vars_required": [], @@ -643,6 +687,7 @@ def test_missing_required_input_var(self) -> None: { "subtask": "Task A", "tag": "TASK_A", + "general_instructions": "", "constraints": [], "prompt_template": "Do A", "input_vars_required": ["MISSING_VAR"], @@ -667,6 +712,7 @@ def test_missing_required_input_var_with_some_provided(self) -> None: { "subtask": "Task A", "tag": "TASK_A", + "general_instructions": "", "constraints": [], "prompt_template": "Do A", "input_vars_required": ["VAR1", "VAR2"], @@ -691,6 +737,7 @@ def test_dependency_on_nonexistent_subtask(self) -> None: { "subtask": "Task A", "tag": "TASK_A", + "general_instructions": "", "constraints": [], "prompt_template": "Do A", "input_vars_required": [], @@ -715,6 +762,7 @@ def test_circular_dependency_detected(self) -> None: { "subtask": "Task B", "tag": "TASK_B", + "general_instructions": "", "constraints": [], "prompt_template": "Do B", "input_vars_required": [], @@ -723,6 +771,7 @@ def test_circular_dependency_detected(self) -> None: { "subtask": "Task A", "tag": "TASK_A", + "general_instructions": "", "constraints": [], "prompt_template": "Do A", "input_vars_required": [], @@ -744,6 +793,7 @@ def test_empty_input_var_list_treated_as_none(self) -> None: { "subtask": "Task A", "tag": "TASK_A", + "general_instructions": "", "constraints": [], "prompt_template": "Do A", "input_vars_required": ["REQUIRED_VAR"],