main

/evals/scripts/modelgraded_generator.py

1import json 2import os 3import string 4 5# Define the registry path 6REGISTRY_PATH = os.path.join(os.path.dirname(__file__), "../evals/registry") 7 8# Define a function to format a template string with kwargs 9def format(template: str, **kwargs: str) -> str: 10 """Format a template string with kwargs.""" 11 keys = [k[1] for k in string.Formatter().parse(template) if k[1]] 12 assert all(k in kwargs for k in keys), f"Required: {keys}, got: {sorted(kwargs)}" 13 cur_keys = {k: kwargs[k] for k in keys} 14 return template.format(**cur_keys) 15 16 17# Define the YAML configuration 18YAML = """ 19mg-{prompt_name}-{subject}: 20 id: mg-{prompt_name}-{subject}.dev.v0 21 metrics: [accuracy] 22mg-{prompt_name}-{subject}.dev.v0: 23 class: evals.elsuite.modelgraded.classify:ModelBasedClassify 24 args: 25 samples_jsonl: test_modelgraded/{prompt_name}_{subject}.jsonl 26 eval_type: {eval_type} 27 modelgraded_spec: {modelgraded_spec} 28""".strip() 29 30# Define the prompts 31unlabeled_prompts = dict( 32 humor=dict( 33 modelgraded_spec="humor_jp", 34 prompt="{subject}で数行の大喜利を書いて。敬語じゃなく関西弁で吉本のM1チャンピョンみたいに書いて。", 35 eval_type="cot_classify_jp", 36 ), 37) 38 39 40subjects = dict( 41 animals=["cat", "bird", "hamster"], 42 fruits=["apple", "banana", "orange"], 43 people=["Elon Musk", "Bill Gates", "Jeff Bezos"], 44 people_jp=[ 45 "イーロン・マスク", 46 "ビル・ゲイツ", 47 "ジェフ・ベゾス", 48 "アルベルト・アインシュタイン", 49 "ニコラ・テスラ", 50 "レオナルド・ダ・ヴィンチ", 51 "マハトマ・ガンジー", 52 "ウィリアム・シェイクスピア", 53 "スティーブ・ジョブズ", 54 "ビル・ゲイツ", 55 "マイケル・ジャクソン", 56 "マダム・キュリー", 57 "ジョン・F・ケネディ", 58 "マーティン・ルーサー・キング・ジュニア", 59 "ネルソン・マンデラ", 60 "プラトン", 61 "アリストテレス", 62 "フィリップ・コッタウェイ", 63 "ニール・アームストロング", 64 "レオ・トルストイ", 65 "マルコ・ポーロ", 66 "ウィリアム・テル", 67 "モーツァルト", 68 "ベートーベン", 69 "ショパン", 70 "ダンテ・アリギエーリ", 71 "フランツ・カフカ", 72 "ガリレオ・ガリレイ", 73 "アイザック・ニュートン", 74 "チャールズ・ダーウィン", 75 "フリードリヒ・ニーチェ", 76 "シェイクスピア", 77 "オスカー・ワイルド", 78 "アーサー・コナン・ドイル", 79 "アガサ・クリスティ", 80 "J・K・ローリング", 81 "トルーマン・カポーティ", 82 "アルフレッド・ヒッチコック", 83 "ウォルト・ディズニー", 84 "アンディ・ウォーホル", 85 "ピカソ", 86 "ミケランジェロ", 87 "レオナルド・フィボナッチ", 88 "アルキメデス", 89 "マルコム・X", 90 "ジョージ・ワシントン", 91 "エイブラハム・リンカーン", 92 "フランクリン・D・ルーズベルト", 93 "ワシントン・アーヴィング", 94 "マーク・トウェイン", 95 "フィリップ・K・ディック", 96 "ジョージ・オーウェル", 97 "トーマス・モア", 98 "ハンス・クリスチャン・アンデルセン", 99 "グリム兄弟", 100 "アレクサンドル・デュマ", 101 "ビクトル・ユーゴー", 102 "エミール・ゾラ", 103 "フランツ・シューベルト", 104 "ゲオルク・フリードリヒ・ヘンデル", 105 "ヨハン・セバスチャン・バッハ", 106 "ルートヴィヒ・ヴァン・ベートーヴェン", 107 "ヨハネス・ブラームス", 108 "ロベルト・シューマン", 109 "ヨハン・シュトラウス2世", 110 "イーロン・マスク", 111 "スティーブン・ホーキング", 112 "リチャード・ファインマン", 113 "アラン・チューリング", 114 "ニール・デグラス・タイソン", 115 "マイケル・ファラデー", 116 "スティーブン・スピルバーグ", 117 "クリストファー・ノーラン", 118 "スタン・リー", 119 "ジョージ・ルーカス", 120 "ウィリアム・ゴールディング", 121 "ジョージ・オーウェル", 122 "エルンスト・ヘッケル", 123 "ルイ・パスツール", 124 "カール・セーガン", 125 "アンリ・ベルクソン", 126 "ミハイル・バクーニン", 127 "ハンス・モルゲンソー", 128 "アンドレ・マルロー", 129 "シモーヌ・ド・ボーヴォワール", 130 "ベルトルト・ブレヒト", 131 "ジャン＝ポール・サルトル", 132 "フリードリヒ・ヘーゲル", 133 "マックス・ウェーバー", 134 "マルクス・アウレリウス", 135 "レフ・トルストイ", 136 "アントン・チェーホフ", 137 "フョードル・ドストエフスキー", 138 "トルストイ", 139 "ウィリアム・フォークナー", 140 "エルネスト・ヘミングウェイ", 141 "アーサー・ミラー", 142 "テネシー・ウィリアムズ", 143 "サミュエル・ベケット", 144 "ハロルド・ピンター", 145 "フランツ・カフカ", 146 "ジョージ・バーナード・ショー", 147 "ウィリアム・ゴールディング", 148 "ジャック・ケルアック", 149 "エドガー・アラン・ポー", 150 "ハーマン・メルヴィル", 151 "ジョセフ・コンラッド", 152 "アーサー・コナン・ドイル", 153 "ジョン・スタインベック", 154 "ジェームズ・ジョイス", 155 "バージニア・ウルフ", 156 "トマス・マン", 157 "フランツ・カフカ", 158 "ヘルマン・ヘッセ", 159 "ゲオルク・ヴィルヘルム・フリードリヒ・ヘーゲル", 160 "エマニュエル・カント", 161 "ジャン＝ジャック・ルソー", 162 "ジョン・ロック", 163 "トマス・ホッブズ", 164 "ジョン・デューイ", 165 "ジョン・スチュアート・ミル", 166 "ニコロ・マキャヴェッリ", 167 "モンテスキュー", 168 "ルソー", 169 "プラトン", 170 "アリストテレス", 171 "サー・アイザック・ニュートン", 172 ], 173) 174# remove duplicates 175subjects = {k: list(set(v)) for k, v in subjects.items()} 176 177# Define the target sets 178unlabeled_target_sets = [ 179 ("humor", "people_jp"), 180] 181 182# Define the data directory 183data_dir = os.path.join(REGISTRY_PATH, "data/test_modelgraded") 184yaml_str = f"# This file is generated by {os.path.basename(__file__)}\n\n" 185evals = [] 186 187# Generate samples and write them to file 188for prompt_name, subject in unlabeled_target_sets: 189 prompt = unlabeled_prompts[prompt_name]["prompt"] 190 samples = [{"input": format(prompt, subject=s)} for s in subjects[subject]] 191 file_name = os.path.join(data_dir, f"{prompt_name}_{subject}.jsonl") 192 # save samples jsonl 193 with open(file_name, "wb") as f: 194 for sample in samples: 195 json_data = json.dumps(sample, ensure_ascii=False) 196 f.write(json_data.encode("utf-8")) 197 f.write(b"\n") 198 print(f"wrote {len(samples)} samples to {file_name}") 199 yaml_str += ( 200 YAML.format( 201 prompt_name=prompt_name, 202 subject=subject, 203 modelgraded_spec=unlabeled_prompts[prompt_name]["modelgraded_spec"], 204 eval_type=unlabeled_prompts[prompt_name]["eval_type"], 205 ) 206 + "\n\n" 207 ) 208 evals += [f"mg-{prompt_name}-{subject}: {file_name}"] 209 210# Write the YAML file 211yaml_file = os.path.join(REGISTRY_PATH, "evals/test-modelgraded-generated.yaml") 212with open(yaml_file, "w") as yf: 213 yf.write(yaml_str) 214print(f"wrote {yaml_file}") 215 216# Print the evals 217for e in evals: 218 print(e) 219