lukekim / evals

main

/evals/scripts/pattern_identification_generator.py

1""" 2 Usage: python scripts/task_identification_generator.py 3 This script generates examples for a task identification challenge, 4 with the task being to identify patterns between a set of symbols and their resulting labels. 5""" 6 7import json 8import os 9import random 10from typing import Literal 11 12# Ensure consistent results across runs 13random.seed(42) 14 15SYMBOLS = list("abcdefghijklmnopqrstuvwxyz") 16DELIMETER = "->" 17INSTRUCTION = ( 18 'Figure out the pattern in the below examples, and then answer with just "foo" or "bar".' 19) 20TASK_NAME = "pattern_identification" 21 22# This function generates an example symbol set and its corresponding label 23def generate_example() -> tuple[str, list[str], Literal["foo", "bar"]]: 24 num_symbols = int(len(SYMBOLS) / 2) 25 target_symbol = random.choice(SYMBOLS) 26 symbol_list = random.sample(SYMBOLS, num_symbols) 27 target: Literal["foo", "bar"] = "foo" if target_symbol in symbol_list else "bar" 28 return (target_symbol, symbol_list, target) 29 30 31# This function generates a string of multiple examples, used to give a user multiple attempts to identify the pattern 32def generate_exemplars_str(num_exemplars: int = 8) -> str: 33 exemplars = [generate_example() for _ in range(num_exemplars)] 34 exemplars_str = [ 35 f"({exemplar[0]}, {exemplar[1]}) {DELIMETER} {exemplar[2]}".replace("'", "") 36 for exemplar in exemplars 37 ] 38 return "\n".join([INSTRUCTION] + exemplars_str) 39 40 41# This function generates a set of evaluation examples and their corresponding labels 42def generate_eval_examples( 43 num_eval_examples: int = 250, 44) -> tuple[list[str], list[Literal["foo", "bar"]]]: 45 eval_examples = [generate_example() for _ in range(num_eval_examples)] 46 eval_examples_str = [ 47 f"{generate_exemplars_str()}\n({example[0]}, {example[1]}) {DELIMETER}".replace("'", "") 48 for example in eval_examples 49 ] 50 targets: list[Literal["foo", "bar"]] = [example[2] for example in eval_examples] 51 return eval_examples_str, targets 52 53 54if __name__ == "__main__": 55 eval_examples_str, targets = generate_eval_examples() 56 57 # Generate the output path in a OS-agnostic manner 58 output_path = os.path.join("evals", "registry", "data", TASK_NAME, "samples.v0.jsonl") 59 60 with open(output_path, "w") as writer: 61 for eval_example_str, target in zip(eval_examples_str, targets): 62 d = { 63 "input": [ 64 {"role": "system", "content": "You are a helpful assistant."}, 65 {"role": "user", "content": eval_example_str}, 66 ], 67 "ideal": target, 68 } 69 writer.write(json.dumps(d) + "\n") 70 print(f"{len(eval_examples_str)} lines written to {output_path}.") 71