1"""
2 Usage: python scripts/task_identification_generator.py
3 This script generates examples for a task identification challenge,
4 with the task being to identify patterns between a set of symbols and their resulting labels.
5"""
6
7import json
8import os
9import random
10from typing import Literal
11
12# Ensure consistent results across runs
13random.seed(42)
14
15SYMBOLS = list("abcdefghijklmnopqrstuvwxyz")
16DELIMETER = "->"
17INSTRUCTION = (
18 'Figure out the pattern in the below examples, and then answer with just "foo" or "bar".'
19)
20TASK_NAME = "pattern_identification"
21
22# This function generates an example symbol set and its corresponding label
23def generate_example() -> tuple[str, list[str], Literal["foo", "bar"]]:
24 num_symbols = int(len(SYMBOLS) / 2)
25 target_symbol = random.choice(SYMBOLS)
26 symbol_list = random.sample(SYMBOLS, num_symbols)
27 target: Literal["foo", "bar"] = "foo" if target_symbol in symbol_list else "bar"
28 return (target_symbol, symbol_list, target)
29
30
31# This function generates a string of multiple examples, used to give a user multiple attempts to identify the pattern
32def generate_exemplars_str(num_exemplars: int = 8) -> str:
33 exemplars = [generate_example() for _ in range(num_exemplars)]
34 exemplars_str = [
35 f"({exemplar[0]}, {exemplar[1]}) {DELIMETER} {exemplar[2]}".replace("'", "")
36 for exemplar in exemplars
37 ]
38 return "\n".join([INSTRUCTION] + exemplars_str)
39
40
41# This function generates a set of evaluation examples and their corresponding labels
42def generate_eval_examples(
43 num_eval_examples: int = 250,
44) -> tuple[list[str], list[Literal["foo", "bar"]]]:
45 eval_examples = [generate_example() for _ in range(num_eval_examples)]
46 eval_examples_str = [
47 f"{generate_exemplars_str()}\n({example[0]}, {example[1]}) {DELIMETER}".replace("'", "")
48 for example in eval_examples
49 ]
50 targets: list[Literal["foo", "bar"]] = [example[2] for example in eval_examples]
51 return eval_examples_str, targets
52
53
54if __name__ == "__main__":
55 eval_examples_str, targets = generate_eval_examples()
56
57 # Generate the output path in a OS-agnostic manner
58 output_path = os.path.join("evals", "registry", "data", TASK_NAME, "samples.v0.jsonl")
59
60 with open(output_path, "w") as writer:
61 for eval_example_str, target in zip(eval_examples_str, targets):
62 d = {
63 "input": [
64 {"role": "system", "content": "You are a helpful assistant."},
65 {"role": "user", "content": eval_example_str},
66 ],
67 "ideal": target,
68 }
69 writer.write(json.dumps(d) + "\n")
70 print(f"{len(eval_examples_str)} lines written to {output_path}.")
71
1"""
2 Usage: python scripts/task_identification_generator.py
3 This script generates examples for a task identification challenge,
4 with the task being to identify patterns between a set of symbols and their resulting labels.
5"""
6
7import json
8import os
9import random
10from typing import Literal
11
12# Ensure consistent results across runs
13random.seed(42)
14
15SYMBOLS = list("abcdefghijklmnopqrstuvwxyz")
16DELIMETER = "->"
17INSTRUCTION = (
18 'Figure out the pattern in the below examples, and then answer with just "foo" or "bar".'
19)
20TASK_NAME = "pattern_identification"
21
22# This function generates an example symbol set and its corresponding label
23def generate_example() -> tuple[str, list[str], Literal["foo", "bar"]]:
24 num_symbols = int(len(SYMBOLS) / 2)
25 target_symbol = random.choice(SYMBOLS)
26 symbol_list = random.sample(SYMBOLS, num_symbols)
27 target: Literal["foo", "bar"] = "foo" if target_symbol in symbol_list else "bar"
28 return (target_symbol, symbol_list, target)
29
30
31# This function generates a string of multiple examples, used to give a user multiple attempts to identify the pattern
32def generate_exemplars_str(num_exemplars: int = 8) -> str:
33 exemplars = [generate_example() for _ in range(num_exemplars)]
34 exemplars_str = [
35 f"({exemplar[0]}, {exemplar[1]}) {DELIMETER} {exemplar[2]}".replace("'", "")
36 for exemplar in exemplars
37 ]
38 return "\n".join([INSTRUCTION] + exemplars_str)
39
40
41# This function generates a set of evaluation examples and their corresponding labels
42def generate_eval_examples(
43 num_eval_examples: int = 250,
44) -> tuple[list[str], list[Literal["foo", "bar"]]]:
45 eval_examples = [generate_example() for _ in range(num_eval_examples)]
46 eval_examples_str = [
47 f"{generate_exemplars_str()}\n({example[0]}, {example[1]}) {DELIMETER}".replace("'", "")
48 for example in eval_examples
49 ]
50 targets: list[Literal["foo", "bar"]] = [example[2] for example in eval_examples]
51 return eval_examples_str, targets
52
53
54if __name__ == "__main__":
55 eval_examples_str, targets = generate_eval_examples()
56
57 # Generate the output path in a OS-agnostic manner
58 output_path = os.path.join("evals", "registry", "data", TASK_NAME, "samples.v0.jsonl")
59
60 with open(output_path, "w") as writer:
61 for eval_example_str, target in zip(eval_examples_str, targets):
62 d = {
63 "input": [
64 {"role": "system", "content": "You are a helpful assistant."},
65 {"role": "user", "content": eval_example_str},
66 ],
67 "ideal": target,
68 }
69 writer.write(json.dumps(d) + "\n")
70 print(f"{len(eval_examples_str)} lines written to {output_path}.")
71