Last active
November 17, 2024 06:13
-
-
Save Maximilian-Winter/5373962ef456a2b0d1ae324fb78e623e to your computer and use it in GitHub Desktop.
Revisions
-
Maximilian-Winter revised this gist
Dec 11, 2023 . 1 changed file with 146 additions and 24 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -14,6 +14,19 @@ class PydanticDataType(Enum): """ Defines the data types supported by Pydantic. Attributes: STRING (str): Represents a string data type. BOOLEAN (str): Represents a boolean data type. INTEGER (str): Represents an integer data type. FLOAT (str): Represents a float data type. OBJECT (str): Represents an object data type. ARRAY (str): Represents an array data type. ENUM (str): Represents an enum data type. CUSTOM_CLASS (str): Represents a custom class data type. """ STRING = "string" BOOLEAN = "boolean" INTEGER = "integer" @@ -125,6 +138,21 @@ def regex_to_gbnf(regex_pattern: str) -> str: def generate_gbnf_integer_rules(max_digit=None, min_digit=None): """ Generate GBNF Integer Rules Generates GBNF (Generalized Backus-Naur Form) rules for integers based on the given maximum and minimum digits. Parameters: - max_digit (int): The maximum number of digits for the integer. Default is None. - min_digit (int): The minimum number of digits for the integer. Default is None. Returns: - integer_rule (str): The identifier for the integer rule generated. - additional_rules (list): A list of additional rules generated based on the given maximum and minimum digits. """ additional_rules = [] # Define the rule identifier based on max_digit and min_digit @@ -156,22 +184,47 @@ def generate_gbnf_integer_rules(max_digit=None, min_digit=None): return integer_rule, additional_rules def generate_gbnf_float_rules(max_digit=None, min_digit=None, max_precision=None, min_precision=None): """ Generate GBNF float rules based on the given constraints. :param max_digit: Maximum number of digits in the integer part (default: None) :param min_digit: Minimum number of digits in the integer part (default: None) :param max_precision: Maximum number of digits in the fractional part (default: None) :param min_precision: Minimum number of digits in the fractional part (default: None) :return: A tuple containing the float rule and additional rules as a list Example Usage: max_digit = 3 min_digit = 1 max_precision = 2 min_precision = 1 generate_gbnf_float_rules(max_digit, min_digit, max_precision, min_precision) Output: ('float-3-1-2-1', ['integer-part-max3-min1 ::= [0-9] [0-9] [0-9]?', 'fractional-part-max2-min1 ::= [0-9] [0-9]?', 'float-3-1-2-1 ::= integer-part-max3-min1 "." fractional-part-max2-min *1']) Note: GBNF stands for Generalized Backus-Naur Form, which is a notation technique to specify the syntax of programming languages or other formal grammars. """ additional_rules = [] # Define the integer part rule integer_part_rule = "integer-part" + (f"-max{max_digit}" if max_digit is not None else "") + ( f"-min{min_digit}" if min_digit is not None else "") # Define the fractional part rule based on precision constraints fractional_part_rule = "fractional-part" fractional_rule_part = '' if max_precision is not None or min_precision is not None: fractional_part_rule += (f"-max{max_precision}" if max_precision is not None else "") + ( f"-min{min_precision}" if min_precision is not None else "") # Minimum number of digits fractional_rule_part = '[0-9]' * (min_precision if min_precision is not None else 1) # Optional additional digits fractional_rule_part += ''.join([' [0-9]?'] * ( (max_precision - (min_precision if min_precision is not None else 1)) if max_precision is not None else 0)) additional_rules.append(f'{fractional_part_rule} ::= {fractional_rule_part}') # Define the float rule @@ -190,18 +243,31 @@ def generate_gbnf_float_rules(max_digit=None, min_digit=None, max_precision=None return float_rule, additional_rules def generate_gbnf_rule_for_type(model_name, field_name, field_type, is_optional, processed_models, created_rules, field_info=None) -> \ Tuple[str, list]: """ Generate GBNF rule for a given field type. :param model_name: Name of the model. :param field_name: Name of the field. :param field_type: Type of the field. :param is_optional: Whether the field is optional. :param processed_models: List of processed models. :param created_rules: List of created rules. :param field_info: Additional information about the field (optional). :return: Tuple containing the GBNF type and a list of additional rules. :rtype: Tuple[str, list] """ rules = [] field_name = format_model_and_field_name(field_name) gbnf_type = map_pydantic_type_to_gbnf(field_type) if isclass(field_type) and issubclass(field_type, BaseModel): nested_model_name = format_model_and_field_name(field_type.__name__) nested_model_rules = generate_gbnf_grammar(field_type, processed_models, created_rules) rules.extend(nested_model_rules) gbnf_type, rules = nested_model_name, rules elif isclass(field_type) and issubclass(field_type, Enum): @@ -212,7 +278,8 @@ def generate_gbnf_rule_for_type(model_name, field_name, field_type, is_optional, elif get_origin(field_type) == list: # Array element_type = get_args(field_type)[0] element_rule_name, additional_rules = generate_gbnf_rule_for_type(model_name, f"{field_name}-element", element_type, is_optional, processed_models, created_rules) rules.extend(additional_rules) array_rule = f"""{model_name}-{field_name} ::= "[" ws {element_rule_name} ("," ws {element_rule_name})* ws "]" """ rules.append(array_rule) @@ -224,11 +291,12 @@ def generate_gbnf_rule_for_type(model_name, field_name, field_type, is_optional, key_type, value_type = get_args(field_type) additional_key_type, additional_key_rules = generate_gbnf_rule_for_type(model_name, f"{field_name}-key-type", key_type, is_optional, processed_models, created_rules) additional_value_type, additional_value_rules = generate_gbnf_rule_for_type(model_name, f"{field_name}-value-type", value_type, is_optional, processed_models, created_rules) gbnf_type = fr'{gbnf_type} ::= "{{" ws ( {additional_key_type} ":" ws {additional_value_type} ("," ws {additional_key_type} ":" ws {additional_value_type})* )? "}}" ws' rules.extend(additional_key_rules) @@ -241,7 +309,7 @@ def generate_gbnf_rule_for_type(model_name, field_name, field_type, is_optional, if not issubclass(union_type, NoneType): union_gbnf_type, union_rules_list = generate_gbnf_rule_for_type(model_name, field_name, union_type, False, processed_models, created_rules) union_rules.append(union_gbnf_type) rules.extend(union_rules_list) @@ -263,7 +331,8 @@ def generate_gbnf_rule_for_type(model_name, field_name, field_type, is_optional, else: gbnf_type = PydanticDataType.STRING.value elif isclass(field_type) and issubclass(field_type, float) and field_info and hasattr(field_info, 'json_schema_extra') and field_info.json_schema_extra is not None: # Retrieve precision attributes for floats max_precision = field_info.json_schema_extra.get('max_precision') if field_info and hasattr(field_info, 'json_schema_extra') else None @@ -276,10 +345,11 @@ def generate_gbnf_rule_for_type(model_name, field_name, field_type, is_optional, # Generate GBNF rule for float with given attributes gbnf_type, rules = generate_gbnf_float_rules(max_digit=max_digits, min_digit=min_digits, max_precision=max_precision, min_precision=min_precision) elif isclass(field_type) and issubclass(field_type, int) and field_info and hasattr(field_info, 'json_schema_extra') and field_info.json_schema_extra is not None: # Retrieve digit attributes for integers max_digits = field_info.json_schema_extra.get('max_digit') if field_info and hasattr(field_info, 'json_schema_extra') else None @@ -304,6 +374,26 @@ def generate_gbnf_rule_for_type(model_name, field_name, field_type, is_optional, def generate_gbnf_grammar(model: Type[BaseModel], processed_models: set, created_rules: dict) -> list: """ Generate GBnF Grammar Generates a GBnF grammar for a given model. :param model: A Pydantic model class to generate the grammar for. Must be a subclass of BaseModel. :param processed_models: A set of already processed models to prevent infinite recursion. :param created_rules: A dict containing already created rules to prevent duplicates. :return: A list of GBnF grammar rules in string format. Example Usage: ``` model = MyModel processed_models = set() created_rules = dict() gbnf_grammar = generate_gbnf_grammar(model, processed_models, created_rules) ``` """ if model in processed_models: return [] @@ -354,6 +444,28 @@ def generate_gbnf_grammar(model: Type[BaseModel], processed_models: set, created def generate_gbnf_grammar_from_pydantic(models: List[Type[BaseModel]], root_rule_class: str = None, root_rule_content: str = None) -> str: """ Generate GBNF Grammar from Pydantic Models. This method takes a list of Pydantic models and uses them to generate a GBNF grammar string. The generated grammar string can be used for parsing and validating data using the generated * grammar. Parameters: - models (List[Type[BaseModel]]): A list of Pydantic models to generate the grammar from. - root_rule_class (str, optional): The name of the root model class. If provided, the generated grammar will have a root rule that matches the specified class. Default is None. - root_rule_content (str, optional): The content of the root model rule. This can be used to specify additional constraints or transformations for the root model. Default is None. Returns: - str: The generated GBNF grammar string. Examples: models = [UserModel, PostModel] grammar = generate_gbnf_grammar_from_pydantic(models) print(grammar) # Output: # root ::= UserModel | PostModel # ... """ processed_models = set() all_rules = [] created_rules = {} @@ -387,15 +499,23 @@ def generate_gbnf_grammar_from_pydantic(models: List[Type[BaseModel]], root_rule return "\n".join(all_rules) def get_primitive_grammar(grammar): type_list = [] if "string-list" in grammar: type_list.append(str) if "boolean-list" in grammar: type_list.append(bool) if "integer-list" in grammar: type_list.append(int) if "float-list" in grammar: type_list.append(float) additional_grammar = [generate_list_rule(t) for t in type_list] primitive_grammar = r""" boolean ::= "true" | "false" string ::= "\"" ( ([^"\\'] | escaped-char)* ) "\"" escaped-char ::= "\\" ["\\/bfnrt"] | unicode-escape unicode-escape ::= "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] ws ::= " " | "\t" | "\n" | " " ws | "\t" ws | "\n" ws fractional-part ::= [0-9]+ integer-part ::= [0-9]+ integer ::= [0-9]+""" @@ -516,7 +636,7 @@ def save_gbnf_grammar_and_documentation(grammar, documentation, grammar_file_pat documentation_file_path="./grammar_documentation.md"): try: with open(grammar_file_path, 'w') as file: file.write(grammar + get_primitive_grammar(grammar)) print(f"Grammar successfully saved to {grammar_file_path}") except IOError as e: print(f"An error occurred while saving the grammar file: {e}") @@ -548,9 +668,11 @@ def generate_and_save_gbnf_grammar_and_documentation(pydantic_model_list, gramma class YourModel(BaseModel): float_field: float = Field(default=..., description="TEST", max_precision=2, min_precision=1) integer_field: int = Field(default=..., description="TEST", max_digit=5, min_digit=3) float_field2: float = Field(default=..., description="TEST", max_digit=5, min_digit=3, max_precision=2, min_precision=1) integer_field2: int = Field(default=..., description="TEST", max_digit=5, min_digit=3) -
Maximilian-Winter revised this gist
Dec 7, 2023 . 1 changed file with 155 additions and 15 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,4 +1,5 @@ import inspect import json import re import typing from inspect import isclass, getdoc @@ -123,7 +124,75 @@ def regex_to_gbnf(regex_pattern: str) -> str: return gbnf_rule def generate_gbnf_integer_rules(max_digit=None, min_digit=None): additional_rules = [] # Define the rule identifier based on max_digit and min_digit integer_rule = "integer-part" if max_digit is not None: integer_rule += f"-max{max_digit}" if min_digit is not None: integer_rule += f"-min{min_digit}" # Handling Integer Rules if max_digit is not None or min_digit is not None: # Start with an empty rule part integer_rule_part = '' # Add mandatory digits as per min_digit if min_digit is not None: integer_rule_part += '[0-9] ' * min_digit # Add optional digits up to max_digit if max_digit is not None: optional_digits = max_digit - (min_digit if min_digit is not None else 0) integer_rule_part += ''.join(['[0-9]? ' for _ in range(optional_digits)]) # Trim the rule part and append it to additional rules integer_rule_part = integer_rule_part.strip() if integer_rule_part: additional_rules.append(f'{integer_rule} ::= {integer_rule_part}') return integer_rule, additional_rules def generate_gbnf_float_rules(max_digit=None, min_digit=None, max_precision=None, min_precision=None): additional_rules = [] # Define the integer part rule integer_part_rule = "integer-part" + (f"-max{max_digit}" if max_digit is not None else "") + (f"-min{min_digit}" if min_digit is not None else "") # Define the fractional part rule based on precision constraints fractional_part_rule = "fractional-part" fractional_rule_part = '' if max_precision is not None or min_precision is not None: fractional_part_rule += (f"-max{max_precision}" if max_precision is not None else "") + (f"-min{min_precision}" if min_precision is not None else "") # Minimum number of digits fractional_rule_part = '[0-9]' * (min_precision if min_precision is not None else 1) # Optional additional digits fractional_rule_part += ''.join([' [0-9]?'] * ((max_precision - (min_precision if min_precision is not None else 1)) if max_precision is not None else 0)) additional_rules.append(f'{fractional_part_rule} ::= {fractional_rule_part}') # Define the float rule float_rule = f"float-{max_digit if max_digit is not None else 'X'}-{min_digit if min_digit is not None else 'X'}-{max_precision if max_precision is not None else 'X'}-{min_precision if min_precision is not None else 'X'}" additional_rules.append(f'{float_rule} ::= {integer_part_rule} "." {fractional_part_rule}') # Generating the integer part rule definition, if necessary if max_digit is not None or min_digit is not None: integer_rule_part = '[0-9]' if min_digit is not None and min_digit > 1: integer_rule_part += ' [0-9]' * (min_digit - 1) if max_digit is not None: integer_rule_part += ''.join([' [0-9]?'] * (max_digit - (min_digit if min_digit is not None else 1))) additional_rules.append(f'{integer_part_rule} ::= {integer_rule_part.strip()}') return float_rule, additional_rules def generate_gbnf_rule_for_type(model_name, field_name, field_type, is_optional, processed_models, created_rules, field_info=None) -> \ Tuple[str, list]: rules = [] @@ -193,16 +262,48 @@ def generate_gbnf_rule_for_type(model_name, field_name, field_type, is_optional, gbnf_type = f"pattern-{field_name} ::= {regex_to_gbnf(regex_pattern)}" else: gbnf_type = PydanticDataType.STRING.value elif isclass(field_type) and issubclass(field_type, float) and field_info and hasattr(field_info,'json_schema_extra'): # Retrieve precision attributes for floats max_precision = field_info.json_schema_extra.get('max_precision') if field_info and hasattr(field_info, 'json_schema_extra') else None min_precision = field_info.json_schema_extra.get('min_precision') if field_info and hasattr(field_info, 'json_schema_extra') else None max_digits = field_info.json_schema_extra.get('max_digit') if field_info and hasattr(field_info, 'json_schema_extra') else None min_digits = field_info.json_schema_extra.get('min_digit') if field_info and hasattr(field_info, 'json_schema_extra') else None # Generate GBNF rule for float with given attributes gbnf_type, rules = generate_gbnf_float_rules(max_digit=max_digits, min_digit=min_digits, max_precision=max_precision, min_precision=min_precision) elif isclass(field_type) and issubclass(field_type, int) and field_info and hasattr(field_info,'json_schema_extra'): # Retrieve digit attributes for integers max_digits = field_info.json_schema_extra.get('max_digit') if field_info and hasattr(field_info, 'json_schema_extra') else None min_digits = field_info.json_schema_extra.get('min_digit') if field_info and hasattr(field_info, 'json_schema_extra') else None # Generate GBNF rule for integer with given attributes gbnf_type, rules = generate_gbnf_integer_rules(max_digit=max_digits, min_digit=min_digits) else: gbnf_type, rules = gbnf_type, [] if is_optional: gbnf_type += ")?" gbnf_type = "(" + gbnf_type if gbnf_type not in created_rules: return gbnf_type, rules else: if gbnf_type in created_rules: return gbnf_type, rules def generate_gbnf_grammar(model: Type[BaseModel], processed_models: set, created_rules: dict) -> list: if model in processed_models: return [] @@ -238,7 +339,9 @@ def generate_gbnf_grammar(model: Type[BaseModel], processed_models: set) -> list is_optional = field_info.is_required is False and get_origin(field_type) is Optional rule_name, additional_rules = generate_gbnf_rule_for_type(model_name, format_model_and_field_name(field_name), field_type, is_optional, processed_models, created_rules, field_info) if rule_name not in created_rules: created_rules[rule_name] = additional_rules model_rule_parts.append(f'\"\\\"{field_name}\\\"\" ":" ws {rule_name}') # Adding escaped quotes nested_rules.extend(additional_rules) @@ -253,11 +356,11 @@ def generate_gbnf_grammar_from_pydantic(models: List[Type[BaseModel]], root_rule root_rule_content: str = None) -> str: processed_models = set() all_rules = [] created_rules = {} if root_rule_class is None: for model in models: model_rules = generate_gbnf_grammar(model, processed_models, created_rules) all_rules.extend(model_rules) root_rule = "root ::= " + " | ".join([format_model_and_field_name(model.__name__) for model in models]) @@ -278,7 +381,7 @@ def generate_gbnf_grammar_from_pydantic(models: List[Type[BaseModel]], root_rule mod_rules.append(mod_rule) grammar_model_rules += "\n" + "\n".join(mod_rules) for model in models: model_rules = generate_gbnf_grammar(model, processed_models, created_rules) all_rules.extend(model_rules) all_rules.insert(0, root_rule + model_rule + grammar_model_rules) return "\n".join(all_rules) @@ -292,7 +395,9 @@ def get_primitive_grammar(): boolean ::= "true" | "false" string ::= "\"" ( [^"\\] | "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) )* "\"" ws float ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? ws ws ::= " " fractional-part ::= [0-9]+ integer-part ::= [0-9]+ integer ::= [0-9]+""" return "\n" + '\n'.join(additional_grammar) + primitive_grammar @@ -337,10 +442,21 @@ def generate_markdown_report(pydantic_models: List[Type[BaseModel]]) -> str: return markdown def format_json_example(example: dict, depth: int) -> str: indent = ' ' * depth formatted_example = '{\n' for key, value in example.items(): value_text = f"'{value}'" if isinstance(value, str) else value formatted_example += f"{indent}{key}: {value_text},\n" formatted_example = formatted_example.rstrip(',\n') + '\n' + indent + '}' return formatted_example def generate_text_documentation(pydantic_models: List[Type[BaseModel]], model_prefix="Model", fields_prefix="Fields") -> str: documentation = "" for model in pydantic_models: documentation += f"{model_prefix}: {format_model_and_field_name(model.__name__)}\n" # Handling multi-line model description with proper indentation documentation += " Description: " @@ -350,11 +466,18 @@ def generate_text_documentation(pydantic_models: List[Type[BaseModel]]) -> str: documentation += "\n" + format_multiline_description(class_description, 2) + "\n\n" # Indenting the fields section documentation += f" {fields_prefix}:\n" if isclass(model) and issubclass(model, BaseModel): for name, field_type in model.__annotations__.items(): documentation += generate_field_text(name, field_type, model) documentation += "\n" if hasattr(model, 'Config') and hasattr(model.Config, 'json_schema_extra') and 'example' in model.Config.json_schema_extra: documentation += f" Expected Example Output for {format_model_and_field_name(model.__name__)}:\n" json_example = json.dumps(model.Config.json_schema_extra['example']) documentation += format_multiline_description(json_example, 2) + "\n" return documentation @@ -363,10 +486,18 @@ def generate_field_text(field_name: str, field_type: Type[Any], model: Type[Base field_text = f"{indent}{field_name} ({field_type.__name__}): \n" field_info = model.model_fields.get(field_name) field_description = field_info.description if field_info else "No description available." # Handling multi-line field description with proper indentation field_text += f"{indent} Description: " + field_description + "\n" # Check for and include field-specific examples if available if hasattr(model, 'Config') and hasattr(model.Config, 'json_schema_extra') and 'example' in model.Config.json_schema_extra: field_example = model.Config.json_schema_extra['example'].get(field_name) if field_example is not None: example_text = f"'{field_example}'" if isinstance(field_example, str) else field_example field_text += f"{indent} Example: {example_text}\n" if isclass(field_type) and issubclass(field_type, BaseModel): field_text += f"{indent} Details:\n" @@ -408,13 +539,22 @@ def remove_empty_lines(string): def generate_and_save_gbnf_grammar_and_documentation(pydantic_model_list, grammar_file_path="./generated_grammar.gbnf", documentation_file_path="./generated_grammar_documentation.md", root_rule_class: str = None, root_rule_content: str = None): documentation = generate_text_documentation(pydantic_model_list, "Output Model", "Output Fields") grammar = generate_gbnf_grammar_from_pydantic(pydantic_model_list, root_rule_class, root_rule_content) grammar = remove_empty_lines(grammar) print(grammar) save_gbnf_grammar_and_documentation(grammar, documentation, grammar_file_path, documentation_file_path) class YourModel(BaseModel): float_field: float = Field(default=..., description="TEST", max_precision=2, min_precision=1) integer_field: int = Field(default=..., description="TEST",max_digit=5, min_digit=3) float_field2: float = Field(default=..., description="TEST",max_digit=5, min_digit=3, max_precision=2, min_precision=1) integer_field2: int = Field(default=..., description="TEST",max_digit=5, min_digit=3) from pydantic import BaseModel, Field from typing import List, Optional from enum import Enum -
Maximilian-Winter revised this gist
Dec 6, 2023 . 1 changed file with 99 additions and 50 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -2,6 +2,7 @@ import re import typing from inspect import isclass, getdoc from types import NoneType from pydantic import BaseModel, Field from pydantic.fields import FieldInfo @@ -168,17 +169,23 @@ def generate_gbnf_rule_for_type(model_name, field_name, field_type, is_optional, union_rules = [] for union_type in union_types: if not issubclass(union_type, NoneType): union_gbnf_type, union_rules_list = generate_gbnf_rule_for_type(model_name, field_name, union_type, False, processed_models) union_rules.append(union_gbnf_type) rules.extend(union_rules_list) # Defining the union grammar rule separately if len(union_rules) == 1: union_grammar_rule = f"{model_name}-{field_name}-optional ::= ({' | '.join(union_rules)})?" else: union_grammar_rule = f"{model_name}-{field_name}-union ::= {' | '.join(union_rules)}" rules.append(union_grammar_rule) if len(union_rules) == 1: gbnf_type = f"{model_name}-{field_name}-optional" else: gbnf_type = f"{model_name}-{field_name}-union" elif isclass(field_type) and issubclass(field_type, str): if field_info and hasattr(field_info, 'pattern'): # Convert regex pattern to grammar rule @@ -229,7 +236,8 @@ def generate_gbnf_grammar(model: Type[BaseModel], processed_models: set) -> list field_type = field_info field_info = model.model_fields[field_name] is_optional = field_info.is_required is False and get_origin(field_type) is Optional rule_name, additional_rules = generate_gbnf_rule_for_type(model_name, format_model_and_field_name(field_name), field_type, is_optional, processed_models, field_info) model_rule_parts.append(f'\"\\\"{field_name}\\\"\" ":" ws {rule_name}') # Adding escaped quotes nested_rules.extend(additional_rules) @@ -266,7 +274,7 @@ def generate_gbnf_grammar_from_pydantic(models: List[Type[BaseModel]], root_rule mod_rules = [] for model in models: mod_rule = fr'{format_model_and_field_name(model.__name__)}-grammar-model ::= ' mod_rule += fr'"\"{format_model_and_field_name(model.__name__)}\"" "," "\"{root_rule_content}\"" ":" {format_model_and_field_name(model.__name__)}' + '\n' mod_rules.append(mod_rule) grammar_model_rules += "\n" + "\n".join(mod_rules) for model in models: @@ -311,19 +319,68 @@ def generate_field_markdown(field_name: str, field_type: Type[Any], model: Type[ def generate_markdown_report(pydantic_models: List[Type[BaseModel]]) -> str: markdown = "" for model in pydantic_models: markdown += f"### {format_model_and_field_name(model.__name__)}\n" # Check if the model's docstring is different from BaseModel's docstring class_doc = getdoc(model) base_class_doc = getdoc(BaseModel) class_description = class_doc if class_doc and class_doc != base_class_doc else "No specific description available." markdown += f"{class_description}\n\n" markdown += "#### Fields\n" if isclass(model) and issubclass(model, BaseModel): for name, field_type in model.__annotations__.items(): markdown += generate_field_markdown(format_model_and_field_name(name), field_type, model) markdown += "\n" return markdown def generate_text_documentation(pydantic_models: List[Type[BaseModel]]) -> str: documentation = "" for model in pydantic_models: documentation += f"Model: {format_model_and_field_name(model.__name__)}\n" # Handling multi-line model description with proper indentation documentation += " Description: " class_doc = getdoc(model) base_class_doc = getdoc(BaseModel) class_description = class_doc if class_doc and class_doc != base_class_doc else "No specific description available." documentation += "\n" + format_multiline_description(class_description, 2) + "\n\n" # Indenting the fields section documentation += " Fields:\n" if isclass(model) and issubclass(model, BaseModel): for name, field_type in model.__annotations__.items(): documentation += generate_field_text(name, field_type, model) documentation += "\n" return documentation def generate_field_text(field_name: str, field_type: Type[Any], model: Type[BaseModel], depth=1) -> str: indent = ' ' * depth field_text = f"{indent}{field_name} ({field_type.__name__}): \n" field_info = model.model_fields.get(field_name) field_description = field_info.description if field_info and field_info.description else "No description available." # Handling multi-line field description with proper indentation field_text += f"{indent} Description: \n" + format_multiline_description(field_description, depth + 2) + "\n" if isclass(field_type) and issubclass(field_type, BaseModel): field_text += f"{indent} Details:\n" for name, type_ in field_type.__annotations__.items(): field_text += generate_field_text(name, type_, field_type, depth + 2) return field_text def format_multiline_description(description: str, indent_level: int) -> str: indent = ' ' * indent_level return indent + description.replace('\n', '\n' + indent) def save_gbnf_grammar_and_documentation(grammar, documentation, grammar_file_path="./grammar.gbnf", documentation_file_path="./grammar_documentation.md"): try: @@ -351,7 +408,7 @@ def remove_empty_lines(string): def generate_and_save_gbnf_grammar_and_documentation(pydantic_model_list, grammar_file_path="./generated_grammar.gbnf", documentation_file_path="./generated_grammar_documentation.md", root_rule_class: str = None, root_rule_content: str = None): documentation = generate_text_documentation(pydantic_model_list) grammar = generate_gbnf_grammar_from_pydantic(pydantic_model_list, root_rule_class, root_rule_content) grammar = remove_empty_lines(grammar) print(grammar) @@ -378,7 +435,6 @@ class SkillSet: class ComplexEmployeeModel: employee_id: int name: str = Field(..., description="Name of the employee.") department: Department = Field(..., description="Department of the employee.") @@ -390,9 +446,7 @@ class ComplexEmployeeModel: # Cmd Command Model class CmdCommandModel(BaseModel): """ A model for executing CMD commands in a Large Language Model setting. """ inner_thoughts: str = Field(..., description="Your inner thoughts or inner monologue while writing the command.") command: str = Field(..., description="The CMD command to execute.") @@ -404,7 +458,7 @@ class CmdCommandModel(BaseModel): class WebBrowsingModel(BaseModel): """ A model designed for handling web browsing operations in a Large Language Model context. It accommodates the thought process in crafting the URL and includes a mechanism for sequential control through a heartbeat feature. """ @@ -418,7 +472,7 @@ class WebBrowsingModel(BaseModel): class WebDownloadModel(BaseModel): """ A model for managing web content downloads in a Large Language Model setting. It captures the considerations in selecting the URL and download path, and supports chained execution via a heartbeat mechanism. """ inner_thoughts: str = Field(..., description="Your inner thoughts or inner monologue while writing the url.") @@ -432,7 +486,7 @@ class WebDownloadModel(BaseModel): class PythonInterpreterCommandModel(BaseModel): """ A model for executing Python commands in a Large Language Model framework. It incorporates the thought process during command creation and enables sequential task execution with a heartbeat mechanism. """ inner_thoughts: str = Field(..., description="Your inner thoughts or inner monologue while writing the command.") @@ -444,9 +498,7 @@ class PythonInterpreterCommandModel(BaseModel): # Write File Section Model class WriteFileSectionModel(BaseModel): """ A model for writing or modifying a section in a file in a Large Language Model setting. """ chain_of_thought: str = Field(..., description="Detailed, step-by-step reasoning for the actions to be performed, ensuring clarity in the task execution process.") @@ -467,8 +519,7 @@ class WriteFileSectionModel(BaseModel): # Read File Model class ReadFileModel(BaseModel): """ A model for reading files in a Large Language Model setting. """ folder: str = Field(None, description="Path to the folder containing the file.") file_name: str = Field(..., @@ -480,8 +531,7 @@ class ReadFileModel(BaseModel): # File List Model class FileListModel(BaseModel): """ A model for listing files in a directory in a Large Language Model setting. """ folder: str = Field(..., description="Path to the directory where files will be listed. This path can include subdirectories to be scanned.") @@ -491,8 +541,7 @@ class FileListModel(BaseModel): class AddCoreMemoryModel(BaseModel): """ A model for adding new entries to the core memory of a Large Language Model. """ key: str = Field(..., description="The key identifier for the core memory entry.") field: str = Field(..., description="A secondary key or field within the core memory entry.") @@ -505,7 +554,6 @@ class AddCoreMemoryModel(BaseModel): class ReplaceCoreMemoryModel(BaseModel): """ A model for replacing specific fields in the core memory of a Large Language Model. """ key: str = Field(..., description="The key identifier for the core memory entry.") field: str = Field(..., description="The specific field within the core memory entry to be replaced.") @@ -518,8 +566,7 @@ class ReplaceCoreMemoryModel(BaseModel): # Remove Core Memory Model class RemoveCoreMemoryModel(BaseModel): """ A model for removing specific fields from the core memory of a Large Language Model. """ key: str = Field(..., description="The key identifier for the core memory entry to be removed.") field: str = Field(..., description="The specific field within the core memory entry to be removed.") @@ -531,7 +578,7 @@ class RemoveCoreMemoryModel(BaseModel): class RolesEnum(str, Enum): EVENT_MEMORY_SEARCH = "Event-Memory-Search" KNOWLEDGE_MEMORY_SEARCH = "Knowledge-Memory-Search" MESSAGE_FROM_SWARM = "Message-From-Swarm" MESSAGE_FROM_USER = "Message-From-User" SYSTEM_MESSAGE = "System-Message" @@ -540,7 +587,6 @@ class RolesEnum(str, Enum): class SearchEventMemoryModel(BaseModel): """ A model for searching event memories in a Large Language Model. """ event_types: List[RolesEnum] = Field(..., description="Array of event types to filter the search.") start_date: str = Field(..., description="The starting date for the event search range.") @@ -553,8 +599,7 @@ class SearchEventMemoryModel(BaseModel): # Search Knowledge Model class SearchKnowledgeModel(BaseModel): """ A model for searching knowledge memories in a Large Language Model. """ query: str = Field(..., description="The query string to search within the 'Knowledge-Memory'.") request_heartbeat: bool = Field(..., @@ -564,8 +609,7 @@ class SearchKnowledgeModel(BaseModel): # Connect Knowledge Memories Model class ConnectKnowledgeMemoriesModel(BaseModel): """ A model for connecting knowledge memories in a Large Language Model. """ request_heartbeat: bool = Field(..., description="Set this to true to get control back after execution, to chain functions together.") @@ -574,19 +618,24 @@ class ConnectKnowledgeMemoriesModel(BaseModel): # Self Reflect Model class SelfReflectModel(BaseModel): """ A model for enabling self-reflection in a Large Language Model. """ request_heartbeat: bool = Field(..., description="Set this to true to get control back after execution, to chain functions together.") class SendMessageToUser(BaseModel): """ A model for sending messages to the user in an AI LLM agent swarm. """ chain_of_thought: str = Field(..., description="Your inner thoughts or chain of thoughts while writing the message to the user.") message: str = Field(..., description="Message you want to send to the user.") generate_and_save_gbnf_grammar_and_documentation( [SendMessageToUser, CmdCommandModel, WebBrowsingModel, PythonInterpreterCommandModel, WriteFileSectionModel, ReadFileModel, FileListModel, AddCoreMemoryModel, ReplaceCoreMemoryModel, RemoveCoreMemoryModel], root_rule_class="function", root_rule_content="function-parameters") -
Maximilian-Winter revised this gist
Dec 6, 2023 . 1 changed file with 80 additions and 33 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -8,6 +8,8 @@ from typing import Any, Type, List, get_args, get_origin, Tuple, Union, Optional from enum import Enum import re class PydanticDataType(Enum): STRING = "string" @@ -32,15 +34,22 @@ def map_pydantic_type_to_gbnf(pydantic_type: Type[Any]) -> str: elif isclass(pydantic_type) and issubclass(pydantic_type, Enum): return PydanticDataType.ENUM.value elif isclass(pydantic_type) and issubclass(pydantic_type, BaseModel): return format_model_and_field_name(pydantic_type.__name__) elif get_origin(pydantic_type) == list: element_type = get_args(pydantic_type)[0] return f"{map_pydantic_type_to_gbnf(element_type)}-list" elif get_origin(pydantic_type) == Union: union_types = get_args(pydantic_type) union_rules = [map_pydantic_type_to_gbnf(ut) for ut in union_types] return f"union-{'-or-'.join(union_rules)}" elif get_origin(pydantic_type) == Optional: element_type = get_args(pydantic_type)[0] return f"optional-{map_pydantic_type_to_gbnf(element_type)}" elif isclass(pydantic_type): return f"{PydanticDataType.CUSTOM_CLASS.value}-{format_model_and_field_name(pydantic_type.__name__)}" elif get_origin(pydantic_type) == dict: key_type, value_type = get_args(pydantic_type) return f"custom-dict-key-type-{format_model_and_field_name(map_pydantic_type_to_gbnf(key_type))}-value-type-{format_model_and_field_name(map_pydantic_type_to_gbnf(value_type))}" else: return "unknown" @@ -52,18 +61,6 @@ def format_model_and_field_name(model_name: str) -> str: return '-'.join(part.lower().replace("_", "-") for part in parts) def generate_list_rule(element_type): """ Generate a GBNF rule for a list of a given element type. @@ -108,20 +105,29 @@ def get_members_structure(cls, rule_name): return result, type_list_rules def regex_to_gbnf(regex_pattern: str) -> str: """ Translate a basic regex pattern to a GBNF rule. Note: This function handles only a subset of simple regex patterns. """ gbnf_rule = regex_pattern # Translate common regex components to GBNF gbnf_rule = gbnf_rule.replace('\\d', '[0-9]') gbnf_rule = gbnf_rule.replace('\\s', '[ \t\n]') # Handle quantifiers and other regex syntax that is similar in GBNF # (e.g., '*', '+', '?', character classes) return gbnf_rule def generate_gbnf_rule_for_type(model_name, field_name, field_type, is_optional, processed_models, field_info=None) -> \ Tuple[str, list]: rules = [] field_name = format_model_and_field_name(field_name) gbnf_type = map_pydantic_type_to_gbnf(field_type) if isclass(field_type) and issubclass(field_type, BaseModel): nested_model_name = format_model_and_field_name(field_type.__name__) @@ -144,12 +150,48 @@ def generate_gbnf_rule_for_type(model_name, field_name, field_type, is_optional, elif gbnf_type.startswith("custom-class-"): nested_model_rules, field_types = get_members_structure(field_type, gbnf_type) rules.append(nested_model_rules) elif gbnf_type.startswith("custom-dict-"): key_type, value_type = get_args(field_type) additional_key_type, additional_key_rules = generate_gbnf_rule_for_type(model_name, f"{field_name}-key-type", key_type, is_optional, processed_models) additional_value_type, additional_value_rules = generate_gbnf_rule_for_type(model_name, f"{field_name}-value-type", value_type, is_optional, processed_models) gbnf_type = fr'{gbnf_type} ::= "{{" ws ( {additional_key_type} ":" ws {additional_value_type} ("," ws {additional_key_type} ":" ws {additional_value_type})* )? "}}" ws' rules.extend(additional_key_rules) rules.extend(additional_value_rules) elif gbnf_type.startswith("union-"): union_types = get_args(field_type) union_rules = [] for union_type in union_types: union_gbnf_type, union_rules_list = generate_gbnf_rule_for_type(model_name, field_name, union_type, False, processed_models) union_rules.append(union_gbnf_type) rules.extend(union_rules_list) # Defining the union grammar rule separately union_grammar_rule = f"{model_name}-{field_name}-union ::= {' | '.join(union_rules)}" rules.append(union_grammar_rule) # Referencing the union rule in the main model rule gbnf_type = f"{model_name}-{field_name}-union" elif isclass(field_type) and issubclass(field_type, str): if field_info and hasattr(field_info, 'pattern'): # Convert regex pattern to grammar rule regex_pattern = field_info.regex.pattern gbnf_type = f"pattern-{field_name} ::= {regex_to_gbnf(regex_pattern)}" else: gbnf_type = PydanticDataType.STRING.value else: gbnf_type, rules = gbnf_type, [] if is_optional: gbnf_type += ")?" gbnf_type = "(" + gbnf_type return gbnf_type, rules @@ -186,9 +228,9 @@ def generate_gbnf_grammar(model: Type[BaseModel], processed_models: set) -> list else: field_type = field_info field_info = model.model_fields[field_name] is_optional = field_info.is_required is False and get_origin(field_type) is Optional rule_name, additional_rules = generate_gbnf_rule_for_type(model_name, field_name, field_type, is_optional, processed_models, field_info) model_rule_parts.append(f'\"\\\"{field_name}\\\"\" ":" ws {rule_name}') # Adding escaped quotes nested_rules.extend(additional_rules) @@ -539,7 +581,12 @@ class SelfReflectModel(BaseModel): description="Set this to true to get control back after execution, to chain functions together.") # generate_and_save_gbnf_grammar_and_documentation( # [PythonInterpreterCommandModel, WebBrowsingModel, WebDownloadModel], root_rule_class="function", # root_rule_content="function") generate_and_save_gbnf_grammar_and_documentation( [CmdCommandModel, WebBrowsingModel, PythonInterpreterCommandModel, WriteFileSectionModel, ReadFileModel, FileListModel, AddCoreMemoryModel, ReplaceCoreMemoryModel, RemoveCoreMemoryModel, SearchEventMemoryModel, SearchKnowledgeModel, ConnectKnowledgeMemoriesModel, SelfReflectModel], root_rule_class="function", root_rule_content="function") -
Maximilian-Winter revised this gist
Dec 6, 2023 . 1 changed file with 1 addition and 1 deletion.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -542,4 +542,4 @@ class SelfReflectModel(BaseModel): generate_and_save_gbnf_grammar_and_documentation( [CmdCommandModel, WebBrowsingModel, PythonInterpreterCommandModel, WriteFileSectionModel, ReadFileModel, FileListModel, AddCoreMemoryModel, ReplaceCoreMemoryModel, RemoveCoreMemoryModel, SearchEventMemoryModel, SearchKnowledgeModel, ConnectKnowledgeMemoriesModel, SelfReflectModel], root_rule_class="function", root_rule_content="function") -
Maximilian-Winter created this gist
Dec 6, 2023 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,545 @@ import inspect import re import typing from inspect import isclass, getdoc from pydantic import BaseModel, Field from pydantic.fields import FieldInfo from typing import Any, Type, List, get_args, get_origin, Tuple, Union, Optional from enum import Enum class PydanticDataType(Enum): STRING = "string" BOOLEAN = "boolean" INTEGER = "integer" FLOAT = "float" OBJECT = "object" ARRAY = "array" ENUM = "enum" CUSTOM_CLASS = "custom-class" def map_pydantic_type_to_gbnf(pydantic_type: Type[Any]) -> str: if isclass(pydantic_type) and issubclass(pydantic_type, str): return PydanticDataType.STRING.value elif isclass(pydantic_type) and issubclass(pydantic_type, bool): return PydanticDataType.BOOLEAN.value elif isclass(pydantic_type) and issubclass(pydantic_type, int): return PydanticDataType.INTEGER.value elif isclass(pydantic_type) and issubclass(pydantic_type, float): return PydanticDataType.FLOAT.value elif isclass(pydantic_type) and issubclass(pydantic_type, Enum): return PydanticDataType.ENUM.value elif isclass(pydantic_type) and issubclass(pydantic_type, BaseModel): return format_model_and_field_name(pydantic_type.__name__.lower()) elif get_origin(pydantic_type) == list: element_type = get_args(pydantic_type)[0] return f"{map_pydantic_type_to_gbnf(element_type)}-list" elif pydantic_type == Optional: element_type = get_args(pydantic_type)[0] return f"{map_pydantic_type_to_gbnf(element_type)}" elif isclass(pydantic_type): return f"{PydanticDataType.CUSTOM_CLASS.value}-{format_model_and_field_name(pydantic_type.__name__.lower())}" else: return "unknown" def format_model_and_field_name(model_name: str) -> str: parts = re.findall('[A-Z][^A-Z]*', model_name) if not parts: # Check if the list is empty return model_name.lower().replace("_", "-") return '-'.join(part.lower().replace("_", "-") for part in parts) from pydantic import BaseModel, Field from pydantic.fields import FieldInfo def generate_type_dict(type_dict: typing.Dict[str, inspect.Parameter]) -> typing.Dict[str, Type]: pass def generate_pydantic_field_dict(type_dict: typing.Dict[str, Type]) -> typing.Dict[str, FieldInfo]: pass def generate_list_rule(element_type): """ Generate a GBNF rule for a list of a given element type. :param element_type: The type of the elements in the list (e.g., 'string'). :return: A string representing the GBNF rule for a list of the given type. """ rule_name = f"{map_pydantic_type_to_gbnf(element_type)}-list" element_rule = map_pydantic_type_to_gbnf(element_type) list_rule = f"{rule_name} ::= \"[\" ws ( {element_rule} (\",\" ws {element_rule})* )? \"]\"" return list_rule def get_members_structure(cls, rule_name): if issubclass(cls, Enum): # Handle Enum types members = [f'\"\\\"{member.value}\\\"\"' for name, member in cls.__members__.items()] return f"{cls.__name__.lower()} ::= " + " | ".join(members) if cls.__annotations__ and cls.__annotations__ != {}: result = f'{rule_name} ::= "{{"' type_list_rules = [] # Modify this comprehension members = [f' ws \"\\\"{name}\\\"\" ws ":" ws {map_pydantic_type_to_gbnf(param_type)}' for name, param_type in cls.__annotations__.items() if name != 'self'] result += '", "'.join(members) result += ' ws "}"' return result, type_list_rules else: init_signature = inspect.signature(cls.__init__) parameters = init_signature.parameters result = f'{cls.__name__.lower()} ::= "{{"' type_list_rules = [] # Modify this comprehension too members = [f' ws \"\\\"{name}\\\"\" ws ":" ws {map_pydantic_type_to_gbnf(param.annotation)}' for name, param in parameters.items() if name != 'self' and param.annotation != inspect.Parameter.empty] result += '", "'.join(members) result += ' ws "}"' return result, type_list_rules def generate_gbnf_rule_for_type(model_name, field_name, field_type, is_optional, processed_models) -> Tuple[str, list]: rules = [] gbnf_type = "" field_name = format_model_and_field_name(field_name) if get_origin(field_type) == Union: union_types = get_args(field_type) union_rules = [map_pydantic_type_to_gbnf(t) for t in union_types if t is not type(None)] if len(union_rules) == 1: gbnf_type = f"{union_rules[0]} ?" field_type = get_args(field_type)[0] else: gbnf_type = f"({' | '.join(union_rules)})" else: gbnf_type = map_pydantic_type_to_gbnf(field_type) if isclass(field_type) and issubclass(field_type, BaseModel): nested_model_name = format_model_and_field_name(field_type.__name__) nested_model_rules = generate_gbnf_grammar(field_type, processed_models) rules.extend(nested_model_rules) gbnf_type, rules = nested_model_name, rules elif isclass(field_type) and issubclass(field_type, Enum): enum_values = [f'\"\\\"{e.value}\\\"\"' for e in field_type] # Adding escaped quotes enum_rule = f"{model_name}-{field_name} ::= {' | '.join(enum_values)}" rules.append(enum_rule) gbnf_type, rules = model_name + "-" + field_name, rules elif get_origin(field_type) == list: # Array element_type = get_args(field_type)[0] element_rule_name, additional_rules = generate_gbnf_rule_for_type(model_name, f"{field_name}-element", element_type, is_optional, processed_models) rules.extend(additional_rules) array_rule = f"""{model_name}-{field_name} ::= "[" ws {element_rule_name} ("," ws {element_rule_name})* ws "]" """ rules.append(array_rule) gbnf_type, rules = model_name + "-" + field_name, rules elif gbnf_type.startswith("custom-class-"): nested_model_rules, field_types = get_members_structure(field_type, gbnf_type) rules.append(nested_model_rules) else: gbnf_type, rules = gbnf_type, [] if is_optional: gbnf_type += "?" return gbnf_type, rules def generate_gbnf_grammar(model: Type[BaseModel], processed_models: set) -> list: if model in processed_models: return [] processed_models.add(model) model_name = format_model_and_field_name(model.__name__) model_fields = {} if not issubclass(model, BaseModel): # For non-Pydantic classes, generate model_fields from __annotations__ or __init__ if hasattr(model, '__annotations__') and model.__annotations__: model_fields = {name: (typ, ...) for name, typ in model.__annotations__.items()} else: init_signature = inspect.signature(model.__init__) parameters = init_signature.parameters model_fields = {name: (param.annotation, param.default) for name, param in parameters.items() if name != 'self'} else: # For Pydantic models, use model_fields and check for ellipsis (required fields) model_fields = model.__annotations__ model_rule_parts = [] nested_rules = [] for field_name, field_info in model_fields.items(): if not issubclass(model, BaseModel): field_type, default_value = field_info # Check if the field is optional (not required) is_optional = (default_value is not inspect.Parameter.empty) and (default_value is not Ellipsis) else: field_type = field_info field_info = model.model_fields[field_name] is_optional = field_info.is_required is False rule_name, additional_rules = generate_gbnf_rule_for_type(model_name, field_name, field_type, is_optional, processed_models) model_rule_parts.append(f'\"\\\"{field_name}\\\"\" ":" ws {rule_name}') # Adding escaped quotes nested_rules.extend(additional_rules) fields_joined = ' ws ", " ws '.join(model_rule_parts) model_rule = f'{model_name} ::= "{{" ws {fields_joined} ws "}}"' all_rules = [model_rule] + nested_rules return all_rules def generate_gbnf_grammar_from_pydantic(models: List[Type[BaseModel]], root_rule_class: str = None, root_rule_content: str = None) -> str: processed_models = set() all_rules = [] if root_rule_class is None: for model in models: model_rules = generate_gbnf_grammar(model, processed_models) all_rules.extend(model_rules) root_rule = "root ::= " + " | ".join([format_model_and_field_name(model.__name__) for model in models]) all_rules.insert(0, root_rule) return "\n".join(all_rules) elif root_rule_class is not None: root_rule = f"root ::= {format_model_and_field_name(root_rule_class)}\n" model_rule = fr'{format_model_and_field_name(root_rule_class)} ::= "{{" ws "\"{root_rule_class}\"" ":" ws grammar-models ws "}}"' fields_joined = " | ".join( [fr'{format_model_and_field_name(model.__name__)}-grammar-model' for model in models]) grammar_model_rules = f'\ngrammar-models ::= {fields_joined}' mod_rules = [] for model in models: mod_rule = fr'{format_model_and_field_name(model.__name__)}-grammar-model ::= ' mod_rule += fr'"\"{model.__name__}\"" "," "\"{root_rule_content}\"" ":" {format_model_and_field_name(model.__name__)}' + '\n' mod_rules.append(mod_rule) grammar_model_rules += "\n" + "\n".join(mod_rules) for model in models: model_rules = generate_gbnf_grammar(model, processed_models) all_rules.extend(model_rules) all_rules.insert(0, root_rule + model_rule + grammar_model_rules) return "\n".join(all_rules) def get_primitive_grammar(): type_list = [str, bool, float, int] additional_grammar = [generate_list_rule(t) for t in type_list] primitive_grammar = r""" boolean ::= "true" | "false" string ::= "\"" ( [^"\\] | "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) )* "\"" ws float ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? ws ws ::= "" integer ::= [0-9]+""" return "\n" + '\n'.join(additional_grammar) + primitive_grammar def generate_field_markdown(field_name: str, field_type: Type[Any], model: Type[BaseModel], depth=1) -> str: indent = ' ' * depth field_markdown = f"{indent}- **{field_name}** (`{field_type.__name__}`): " # Extracting field description from Pydantic Field using __model_fields__ field_info = model.model_fields.get(field_name) field_description = field_info.description if field_info and field_info.description else "No description available." field_markdown += field_description + '\n' # Handling nested BaseModel fields if isclass(field_type) and issubclass(field_type, BaseModel): field_markdown += f"{indent} - Details:\n" for name, type_ in field_type.__annotations__.items(): field_markdown += generate_field_markdown(name, type_, field_type, depth + 2) return field_markdown def generate_markdown_report(pydantic_models: List[Type[BaseModel]]) -> str: markdown = "" for model in pydantic_models: markdown += f"## {model.__name__}\n" class_doc = getdoc(model) or "No description available." markdown += f"{class_doc}\n\n" markdown += "### Fields\n" if isclass(model) and issubclass(model, BaseModel): for name, field_type in model.__annotations__.items(): markdown += generate_field_markdown(name, field_type, model) markdown += "\n" return markdown def save_gbnf_grammar_and_documentation(grammar, documentation, grammar_file_path="./grammar.gbnf", documentation_file_path="./grammar_documentation.md"): try: with open(grammar_file_path, 'w') as file: file.write(grammar + get_primitive_grammar()) print(f"Grammar successfully saved to {grammar_file_path}") except IOError as e: print(f"An error occurred while saving the grammar file: {e}") try: with open(documentation_file_path, 'w') as file: file.write(documentation) print(f"Documentation successfully saved to {documentation_file_path}") except IOError as e: print(f"An error occurred while saving the documentation file: {e}") def remove_empty_lines(string): lines = string.splitlines() non_empty_lines = [line for line in lines if line.strip() != ""] string_no_empty_lines = "\n".join(non_empty_lines) return string_no_empty_lines def generate_and_save_gbnf_grammar_and_documentation(pydantic_model_list, grammar_file_path="./generated_grammar.gbnf", documentation_file_path="./generated_grammar_documentation.md", root_rule_class: str = None, root_rule_content: str = None): documentation = generate_markdown_report(pydantic_model_list) grammar = generate_gbnf_grammar_from_pydantic(pydantic_model_list, root_rule_class, root_rule_content) grammar = remove_empty_lines(grammar) print(grammar) save_gbnf_grammar_and_documentation(grammar, documentation, grammar_file_path, documentation_file_path) from pydantic import BaseModel, Field from typing import List, Optional from enum import Enum class Department(Enum): """Enum for department names.""" HR = 'Human Resources' IT = 'Information Technology' SALES = 'Sales' MARKETING = 'Marketing' class SkillSet: """Skillset of the employee.""" primary_skill: str = Field(..., description="Primary skill of the employee.") secondary_skills: List[str] = Field(..., description="List of secondary skills.") class ComplexEmployeeModel: """Detailed employee model.""" employee_id: int name: str = Field(..., description="Name of the employee.") department: Department = Field(..., description="Department of the employee.") skill_set: SkillSet = Field(..., description="Skillset of the employee.") experience_years: float = Field(..., description="Years of experience.") is_full_time: bool = Field(True, description="Is the employee full-time.") # Cmd Command Model class CmdCommandModel(BaseModel): """ A model for executing CMD commands within a Large Language Model environment. It captures the user's inner thoughts during command formulation and supports function chaining through a heartbeat mechanism. """ inner_thoughts: str = Field(..., description="Your inner thoughts or inner monologue while writing the command.") command: str = Field(..., description="The CMD command to execute.") require_heartbeat: bool = Field(..., description="Set this to true to get control back after execution, to chain functions together.") # Web Browsing Model class WebBrowsingModel(BaseModel): """ A model designed for handling web browsing operations in a Large Language Model context. It accommodates the user's thought process in crafting the URL and includes a mechanism for sequential control through a heartbeat feature. """ inner_thoughts: str = Field(..., description="Your inner thoughts or inner monologue while writing the url.") URL: str = Field(..., description="The URL you want to access.") require_heartbeat: bool = Field(..., description="Set this to true to get control back after execution, to chain functions together.") # Web Download Model class WebDownloadModel(BaseModel): """ A model for managing web content downloads in a Large Language Model setting. It captures the user's considerations in selecting the URL and download path, and supports chained execution via a heartbeat mechanism. """ inner_thoughts: str = Field(..., description="Your inner thoughts or inner monologue while writing the url.") URL: str = Field(..., description="The URL you want to download.") Path: str = Field(..., description="The Path you want to download the file to.") require_heartbeat: bool = Field(..., description="Set this to true to get control back after execution, to chain functions together.") # Python Interpreter Command Model class PythonInterpreterCommandModel(BaseModel): """ A model for executing Python commands in a Large Language Model framework. It incorporates the user's thought process during command creation and enables sequential task execution with a heartbeat mechanism. """ inner_thoughts: str = Field(..., description="Your inner thoughts or inner monologue while writing the command.") command: str = Field(..., description="The Python command to execute.") require_heartbeat: bool = Field(..., description="Set this to true to get control back after execution, to chain functions together.") # Write File Section Model class WriteFileSectionModel(BaseModel): """ A model to facilitate writing to specific sections of a file in a Large Language Model. It includes detailed reasoning for the writing process and supports chaining of write operations with a heartbeat feature. """ chain_of_thought: str = Field(..., description="Detailed, step-by-step reasoning for the actions to be performed, ensuring clarity in the task execution process.") folder: str = Field(..., description="Path to the folder where the file is located or will be created. It should be a valid directory path.") file_name: str = Field(..., description="Name of the target file (excluding the file extension) where the section will be written or modified.") file_extension: str = Field(..., description="File extension indicating the file type, such as '.txt', '.py', '.md', etc.") section: str = Field(..., description="The specific section within the file to be targeted, such as a class, method, or a uniquely identified section.") body: str = Field(..., description="The actual content to be written into the specified section. It can be code, text, or data in a format compatible with the file type.") request_heartbeat: bool = Field(..., description="Set this to true to get control back after execution, to chain functions together.") # Read File Model class ReadFileModel(BaseModel): """ A model dedicated to reading file contents in a Large Language Model environment. It enables specifying file location and supports sequential reading tasks through a heartbeat mechanism. """ folder: str = Field(None, description="Path to the folder containing the file.") file_name: str = Field(..., description="The name of the file to be read, including its extension (e.g., 'document.txt').") request_heartbeat: bool = Field(..., description="Set this to true to get control back after execution, to chain functions together.") # File List Model class FileListModel(BaseModel): """ A model for listing files within a specified directory in a Large Language Model setup. It also allows for chained file listing operations enabled by a heartbeat feature. """ folder: str = Field(..., description="Path to the directory where files will be listed. This path can include subdirectories to be scanned.") request_heartbeat: bool = Field(..., description="Set this to true to get control back after execution, to chain functions together.") class AddCoreMemoryModel(BaseModel): """ A model designed to add entries to the core memory of a Large Language Model. It facilitates the storage of key-value pairs and supports sequential memory operations with a heartbeat mechanism. """ key: str = Field(..., description="The key identifier for the core memory entry.") field: str = Field(..., description="A secondary key or field within the core memory entry.") value: str = Field(..., description="The value or data to be stored in the specified core memory entry.") request_heartbeat: bool = Field(..., description="Set this to true to get control back after execution, to chain functions together.") # Replace Core Memory Model class ReplaceCoreMemoryModel(BaseModel): """ A model for replacing specific fields in the core memory of a Large Language Model. It allows updating of existing memory entries and includes a heartbeat feature for chained memory operations. """ key: str = Field(..., description="The key identifier for the core memory entry.") field: str = Field(..., description="The specific field within the core memory entry to be replaced.") new_value: str = Field(..., description="The new value to replace the existing data in the specified core memory field.") request_heartbeat: bool = Field(..., description="Set this to true to get control back after execution, to chain functions together.") # Remove Core Memory Model class RemoveCoreMemoryModel(BaseModel): """ A model to remove entries or specific fields from the core memory in a Large Language Model. It supports sequential memory modification tasks through a heartbeat mechanism. """ key: str = Field(..., description="The key identifier for the core memory entry to be removed.") field: str = Field(..., description="The specific field within the core memory entry to be removed.") request_heartbeat: bool = Field(..., description="Set this to true to get control back after execution, to chain functions together.") # Defining the RolesEnum class RolesEnum(str, Enum): EVENT_MEMORY_SEARCH = "Event-Memory-Search" KNOWLEDGE_MEMORY_SEARCH = "Knowledge-Memory-Search" MESSAGE_FROM_DEEP_THOUGHT = "Message-From-Deep-Thought" MESSAGE_FROM_USER = "Message-From-User" SYSTEM_MESSAGE = "System-Message" # Search Event Memory Model class SearchEventMemoryModel(BaseModel): """ A model for searching event memories in a Large Language Model. It allows filtering by event types, date range, and content keywords, with a heartbeat feature for continuous search operations. """ event_types: List[RolesEnum] = Field(..., description="Array of event types to filter the search.") start_date: str = Field(..., description="The starting date for the event search range.") end_date: str = Field(..., description="The ending date for the event search range.") content_keywords: List[str] = Field(..., description="Array of keywords to search within the event content.") request_heartbeat: bool = Field(..., description="Set this to true to get control back after execution, to chain functions together.") # Search Knowledge Model class SearchKnowledgeModel(BaseModel): """ A model tailored for querying knowledge memories in a Large Language Model framework. It supports extended search operations enabled by a heartbeat mechanism. """ query: str = Field(..., description="The query string to search within the 'Knowledge-Memory'.") request_heartbeat: bool = Field(..., description="Set this to true to get control back after execution, to chain functions together.") # Connect Knowledge Memories Model class ConnectKnowledgeMemoriesModel(BaseModel): """ A model for connecting various knowledge memories in a Large Language Model. It enables linking different knowledge bases and supports chained operations through a heartbeat feature. """ request_heartbeat: bool = Field(..., description="Set this to true to get control back after execution, to chain functions together.") # Self Reflect Model class SelfReflectModel(BaseModel): """ A model to enable self-reflection capabilities in a Large Language Model. It supports introspective operations and continuous self-analysis through a heartbeat mechanism. """ request_heartbeat: bool = Field(..., description="Set this to true to get control back after execution, to chain functions together.") generate_and_save_gbnf_grammar_and_documentation( [CmdCommandModel, WebBrowsingModel, PythonInterpreterCommandModel, WriteFileSectionModel, ReadFileModel, FileListModel, AddCoreMemoryModel, ReplaceCoreMemoryModel, RemoveCoreMemoryModel, SearchEventMemoryModel, SearchKnowledgeModel, ConnectKnowledgeMemoriesModel, SelfReflectModel])