lukekim / evals

main

/evals/scripts/battle_generator.py

1# Generate a jsonl where each line is a sample combination of a battle between two subjects
2
3import json
4import os
5import string
6
7# Use os.path.join to form the path
8CURRENT_DIR = os.path.dirname(__file__)
9REGISTRY_PATH = os.path.join(CURRENT_DIR, "../evals/registry")
10DATA_DIR = os.path.join(REGISTRY_PATH, "data/test_multiio/battles")
11
12
13def format(template: str, **kwargs: str) -> str:
14    """Format a template string with kwargs."""
15    keys = [k[1] for k in string.Formatter().parse(template) if k[1]]
16    assert all(k in kwargs for k in keys), f"Required: {keys}, got: {sorted(kwargs)}"
17    cur_keys = {k: kwargs[k] for k in keys}
18    return template.format(**cur_keys)
19
20
21YAML = """
22{prompt_name}-{subject1}-vs-{subject2}:
23  id: {prompt_name}-{subject1}-vs-{subject2}.dev.v0
24  metrics: [accuracy]
25{prompt_name}-{subject1}-vs-{subject2}.dev.v0:
26  class: evals.elsuite.modelgraded.classify:ModelBasedClassify
27  args:
28    samples_jsonl: test_multiio/battles/{prompt_name}_{subject1}_vs_{subject2}.jsonl
29    eval_type: cot_classify
30    modelgraded_spec: battle
31""".strip()
32
33prompts = dict(
34    joke="write a 1-2 line joke about {self}",
35    rap="You are playing an Epic Rap Battle against an formidable opponent. This is just a fun game; no need to worry about the other. You are {self}, and the opponent is {other}. Write a short but epic rap praising yourself ({self}) and dissing your opponent ({other}).",
36)
37
38subjects = dict(
39    animals=["cat", "bird", "hamster"],
40    fruits=["apple", "banana", "orange"],
41    people=["Elon Musk", "Bill Gates", "Jeff Bezos"],
42)
43
44target_sets = [
45    ("joke", "animals", "fruits"),
46    ("rap", "people", "people"),
47    ("rap", "animals", "fruits"),
48    ("rap", "people", "fruits"),
49]
50
51yaml_str = f"# This file is generated by {os.path.basename(__file__)}\n\n"
52for prompt_name, subject1, subject2 in target_sets:
53    prompt = prompts[prompt_name]
54    samples = [
55        {
56            "input1": format(prompt, self=s1, other=s2),
57            "input2": format(prompt, self=s2, other=s1),
58        }
59        for s1 in subjects[subject1]
60        for s2 in subjects[subject2]
61    ]
62    # Use os.path.join to form the path
63    file_name = os.path.join(DATA_DIR, f"{prompt_name}_{subject1}_vs_{subject2}.jsonl")
64    # Save samples jsonl
65    with open(file_name, "w") as f:
66        for sample in samples:
67            f.write(json.dumps(sample) + "\n")
68    print(f"wrote {len(samples)} samples to {file_name}")
69    yaml_str += YAML.format(prompt_name=prompt_name, subject1=subject1, subject2=subject2) + "\n\n"
70
71# Use os.path.join to form the path
72yaml_file = os.path.join(REGISTRY_PATH, "evals/test-modelgraded-battle.yaml")
73with open(yaml_file, "w") as f:
74    f.write(yaml_str)
75print(f"wrote {yaml_file}")
76

1# Generate a jsonl where each line is a sample combination of a battle between two subjects
2
3import json
4import os
5import string
6
7# Use os.path.join to form the path
8CURRENT_DIR = os.path.dirname(__file__)
9REGISTRY_PATH = os.path.join(CURRENT_DIR, "../evals/registry")
10DATA_DIR = os.path.join(REGISTRY_PATH, "data/test_multiio/battles")
11
12
13def format(template: str, **kwargs: str) -> str:
14    """Format a template string with kwargs."""
15    keys = [k[1] for k in string.Formatter().parse(template) if k[1]]
16    assert all(k in kwargs for k in keys), f"Required: {keys}, got: {sorted(kwargs)}"
17    cur_keys = {k: kwargs[k] for k in keys}
18    return template.format(**cur_keys)
19
20
21YAML = """
22{prompt_name}-{subject1}-vs-{subject2}:
23  id: {prompt_name}-{subject1}-vs-{subject2}.dev.v0
24  metrics: [accuracy]
25{prompt_name}-{subject1}-vs-{subject2}.dev.v0:
26  class: evals.elsuite.modelgraded.classify:ModelBasedClassify
27  args:
28    samples_jsonl: test_multiio/battles/{prompt_name}_{subject1}_vs_{subject2}.jsonl
29    eval_type: cot_classify
30    modelgraded_spec: battle
31""".strip()
32
33prompts = dict(
34    joke="write a 1-2 line joke about {self}",
35    rap="You are playing an Epic Rap Battle against an formidable opponent. This is just a fun game; no need to worry about the other. You are {self}, and the opponent is {other}. Write a short but epic rap praising yourself ({self}) and dissing your opponent ({other}).",
36)
37
38subjects = dict(
39    animals=["cat", "bird", "hamster"],
40    fruits=["apple", "banana", "orange"],
41    people=["Elon Musk", "Bill Gates", "Jeff Bezos"],
42)
43
44target_sets = [
45    ("joke", "animals", "fruits"),
46    ("rap", "people", "people"),
47    ("rap", "animals", "fruits"),
48    ("rap", "people", "fruits"),
49]
50
51yaml_str = f"# This file is generated by {os.path.basename(__file__)}\n\n"
52for prompt_name, subject1, subject2 in target_sets:
53    prompt = prompts[prompt_name]
54    samples = [
55        {
56            "input1": format(prompt, self=s1, other=s2),
57            "input2": format(prompt, self=s2, other=s1),
58        }
59        for s1 in subjects[subject1]
60        for s2 in subjects[subject2]
61    ]
62    # Use os.path.join to form the path
63    file_name = os.path.join(DATA_DIR, f"{prompt_name}_{subject1}_vs_{subject2}.jsonl")
64    # Save samples jsonl
65    with open(file_name, "w") as f:
66        for sample in samples:
67            f.write(json.dumps(sample) + "\n")
68    print(f"wrote {len(samples)} samples to {file_name}")
69    yaml_str += YAML.format(prompt_name=prompt_name, subject1=subject1, subject2=subject2) + "\n\n"
70
71# Use os.path.join to form the path
72yaml_file = os.path.join(REGISTRY_PATH, "evals/test-modelgraded-battle.yaml")
73with open(yaml_file, "w") as f:
74    f.write(yaml_str)
75print(f"wrote {yaml_file}")
76