/evals/spicepod.yaml
1name: spicepod
2version: v1beta1
3kind: Spicepod
4
5# This file is generate from the evals & data in the repo. Specifically using [evalconverter](https://github.com/spiceai/spiceai/tree/trunk/tools/evalconverter).
6# ```shell
7# evalconverter -i evals/registry/evals -b evals/registry/data
8#```
9
10datasets:
11 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/actors-sequence/samples.jsonl
12 # name: actors_sequence__dev__match_v1
13 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/adultery-state-laws/samples.jsonl
14 # name: adultery_state_laws__dev__v0
15 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/proofreader/samples.jsonl
16 # name: proofreader__dev__v0
17 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/rock-climbing/samples.jsonl
18 # name: rock_climbing__dev__v0
19 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/banking77/samples.jsonl
20 # name: match_banking77__test__v1
21 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/ukraine_gec/ukraine_gec_grammar_prep.jsonl
22 # name: ukraine_gec_grammar_prep__dev__v0
23 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/ukraine_gec/ukraine_gec_grammar_case.jsonl
24 # name: ukraine_gec_grammar_case__dev__v0
25 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/ukraine_gec/ukraine_gec_grammar_gender.jsonl
26 # name: ukraine_gec_grammar_gender__dev__v0
27 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/ukraine_gec/ukraine_gec_grammar_partvoice.jsonl
28 # name: ukraine_gec_grammar_partvoice__dev__v0
29 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/ukraine_gec/ukraine_gec_fluency_poorflow.jsonl
30 # name: ukraine_gec_fluency_poorflow__dev__v0
31 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/ukraine_gec/ukraine_gec_grammar_verbvoice.jsonl
32 # name: ukraine_gec_grammar_verbvoice__dev__v0
33 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/ukraine_gec/ukraine_gec_grammar_number.jsonl
34 # name: ukraine_gec_grammar_number__dev__v0
35 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/ukraine_gec/ukraine_gec_fluency_repetition.jsonl
36 # name: ukraine_gec_fluency_repetition__dev__v0
37 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/ukraine_gec/ukraine_gec_fluency_calque.jsonl
38 # name: ukraine_gec_fluency_calque__dev__v0
39 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/ukraine_gec/ukraine_gec_grammar_verbaform.jsonl
40 # name: ukraine_gec_grammar_verbaform__dev__v0
41 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/ukraine_gec/ukraine_gec_grammar_ungrammaticalstructure.jsonl
42 # name: ukraine_gec_grammar_ungrammaticalstructure__dev__v0
43 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/ukraine_gec/ukraine_gec_grammar_other.jsonl
44 # name: ukraine_gec_grammar_other__dev__v0
45 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/ukraine_gec/ukraine_gec_fluency_style.jsonl
46 # name: ukraine_gec_fluency_style__dev__v0
47 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/ukraine_gec/ukraine_gec_fluency_other.jsonl
48 # name: ukraine_gec_fluency_other__dev__v0
49 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/ukraine_gec/ukraine_gec_grammar_conjunction.jsonl
50 # name: ukraine_gec_grammar_conjunction__dev__v0
51 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/ukraine_gec/ukraine_gec_grammar_comparison.jsonl
52 # name: ukraine_gec_grammar_comparison__dev__v0
53 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/ukraine_gec/ukraine_gec_grammar_tense.jsonl
54 # name: ukraine_gec_grammar_tense__dev__v0
55 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/ukraine_gec/ukraine_gec_grammar_aspect.jsonl
56 # name: ukraine_gec_grammar_aspect__dev__v0
57 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/irish_plural_nouns/samples.jsonl
58 # name: irish_plural_nouns__dev__v0
59 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/shape_in_shape/shape_in_shape.jsonl
60 # name: shape_in_shape__dev__v1
61 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/russian_sarcasm/samples.jsonl
62 # name: russian_sarcasm__dev__v0
63 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/syllables_long_words/long_word_samples.jsonl
64 # name: syllables__dev__v1
65 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/crepe/samples.jsonl
66 # name: crepe__dev__v2
67 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/coq-proof-step/match.jsonl
68 # name: coq_proof_step_match__dev__v0
69 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/ukraine_eit/samples.jsonl
70 # name: ukraine_eit__val__v0
71 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/belarusian_proverbs/samples.jsonl
72 # name: belarusian_proverbs__dev__v0
73 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/invoices/match.jsonl
74 # name: invoices__dev__v0
75 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/urdu-lexicon/samples.jsonl
76 # name: urdu_lexicon__dev__v0
77 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/qa/q_and_a.jsonl
78 # name: qa__dev__v0
79 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/french-part-of-speech/samples.jsonl
80 # name: french_part_of_speech__dev__v0
81 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/internal_representations/samples.jsonl
82 # name: internal_representations__dev__v0
83 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/python_list_comprehension/samples.jsonl
84 # name: python_list_comprehension__dev__v0
85 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/nepali_numerals/samples.jsonl
86 # name: nepali_numerals__dev__v0
87 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/belarusian_syllable_count/samples.jsonl
88 # name: belarusian_syllable_count__dev__v0
89 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/mandaliof-table/samples.jsonl
90 # name: mandaliof_table__dev__v0
91 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/test_japanese_english_numerals/samples.jsonl
92 # name: test_japanese_english_numerals__dev__v0
93 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/tracking-shuffled-objects/samples.jsonl
94 # name: tracking_shuffled_objects__dev__v0
95 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/squares-gpt/square-samples.jsonl
96 # name: squares_gpt__dev__v0
97 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/convert-hex-hsl-lightness/samples.jsonl
98 # name: convert_hex_hsl_lightness__dev__v0
99 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/russe/samples.jsonl
100 # name: russe__test__v0
101 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/aba_mrpc_true_false/samples.jsonl
102 # name: aba_mrpc_true_false__dev__v0
103 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/logical_counting/samples.jsonl
104 # name: logical_counting__dev__v0
105 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/vigenere/samples.jsonl
106 # name: vigenere__s1__simple_v0
107 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/map-electronic-component-part-to-fact/samples.jsonl
108 # name: map_electronic_component_part_to_fact__dev__v0
109 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/rare-and-loanwords-dutch-lexicon/samples.jsonl
110 # name: rare_and_loanwords_dutch_lexicon__dev__v0
111 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/product-ie/fewshot/product_ie_one_shot_samples.jsonl
112 # name: product_information_extraction_one_shot__dev__v0
113 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/product-ie/zeroshot/product_ie_zero_shot_samples.jsonl
114 # name: product_information_extraction_zero_shot__dev__v0
115 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/sort_numeric/samples.jsonl
116 # name: sort_numbers__s1__simple_v0
117 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/product-matching/zeroshot/samples.jsonl
118 # name: match_product_matching_zeroshot__dev__v1
119 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/product-matching/fewshot/samples.jsonl
120 # name: match_product_matching_fewshot__dev__v1
121 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/product-matching/rules/samples.jsonl
122 # name: match_product_matching_rules__dev__v1
123 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/russian-lexicon/samples.jsonl
124 # name: russian_lexicon__dev__v0
125 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/dutch-lexicon/samples.jsonl
126 # name: dutch_lexicon__dev__v0
127 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/greek_nt_manuscripts/codes-sigla-centuries.jsonl
128 # name: greek_nt_manuscripts__v0
129 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/matrix_mult_rows/samples.jsonl
130 # name: matrix_mult_rows__dev__v0
131 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/moral_exceptQA/samples.jsonl
132 # name: moral_exceptqa__test__v1
133 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/music-theory/triads-samples.jsonl
134 # name: music_theory_triads_identification__dev__v0
135 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/music-theory/tetrads-samples.jsonl
136 # name: music_theory_tetrads_identification__dev__v0
137 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/find-thirukkural/samples.jsonl
138 # name: find_thirukkural__dev__v0
139 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/building_floorplan/samples.jsonl
140 # name: building_floorplan__test__v1
141 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/japanese-national-medical-exam01/japanese-national-medical-exam01.jsonl
142 # name: japanese_national_medical_exam01__dev__v0
143 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/lat_long_identify/samples.jsonl
144 # name: lat_long_identify__dev__v0
145 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/norwegian-lexicon/samples.jsonl
146 # name: norwegian_lexicon__dev__v0
147 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/german-part-of-speech/samples.jsonl
148 # name: german_part_of_speech__dev__v0
149 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/swedish_sat/samples.jsonl
150 # name: swedish_sat__dev__v0
151 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/utility_price_parsing/samples.jsonl
152 # name: utility_price_parsing__dev__v0
153 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/korean-consonant-vowel-combination/samples.jsonl
154 # name: korean_consonant_vowel_combination__dev__v0
155 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/mate-in-one/samples.jsonl
156 # name: mate_in_one__dev__v0
157 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/french-lexicon/samples.jsonl
158 # name: french_lexicon__dev__v0
159 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/swedish-spelling/samples.jsonl
160 # name: swedish_spelling__dev__v0
161 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/knot-theory/knot-theory-unknotting-numbers.jsonl
162 # name: knot_theory_unknotting_number__dev__v0
163 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/knot-theory/knot-theory-unknotting-problems.jsonl
164 # name: knot_theory_unknotting_problem__dev__v0
165 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/knot-theory/knot-theory-code-conversions.jsonl
166 # name: knot_theory_code_conversion__dev__v0
167 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/hindi_words/samples.jsonl
168 # name: hindi_words__dev__v0
169 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/arithmetical_puzzles/arithmetical_puzzles.jsonl
170 # name: arithmetical_puzzles__dev__v0
171 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/belarusian_antonyms/samples.jsonl
172 # name: belarusian_antonyms__dev__v0
173 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/body_movement/body_movement.jsonl
174 # name: body_movement__dev__zero_shot_v0
175 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/afrikaans-lexicon/samples.jsonl
176 # name: afrikaans_lexicon__dev__v0
177 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/cricket_situations/samples.jsonl
178 # name: cricket_situations__dev__v0
179 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/korean_spelling/samples.jsonl
180 # name: korean_spelling__dev__v0
181 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/rucola/samples.jsonl
182 # name: rucola__test__v0
183 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/logiqa-logical-reasoning-plus/reclor-logical-reasoning-plus.jsonl
184 # name: reclor_logical_reasoning_plus__dev__v0
185 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/logiqa-logical-reasoning-plus/logiqav2-logical-reasoning-plus.jsonl
186 # name: logiqav2_logical_reasoning_plus__dev__v0
187 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/logiqa-logical-reasoning-plus/logiqa-logical-reasoning-plus.jsonl
188 # name: logiqa_logical_reasoning_plus__dev__v0
189 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/medmcqa/samples.jsonl
190 # name: medmcqa__dev__v0
191 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/multi-step-equations/samples.jsonl
192 # name: multi_step_equations__dev__v0
193 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/islands/japanese_remote_island_to_prefecture.jsonl
194 # name: japanese_remote_island_to_prefecture__dev__v0
195 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/chinese_homonym/samples.jsonl
196 # name: chinese_homonym__dev__v0
197 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/norwegian-rhymes/samples.jsonl
198 # name: norwegian_rhymes__dev__v0
199 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/chinese_shi_jing/samples.jsonl
200 # name: chinese_shi_jing__test__v1
201 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/forth_stack_sim/basic_samples.jsonl
202 # name: forth_stack_sim_basic__dev__v0
203 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/forth_stack_sim/samples.jsonl
204 # name: forth_stack_sim__dev__v0
205 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/forth_stack_sim/detailed_samples.jsonl
206 # name: forth_stack_sim_detailed__dev__v0
207 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/japanese_city_name_pronunciation/samples.jsonl
208 # name: japanese_city_name_pronunciation__dev__v0
209 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/escher_sentences/samples.jsonl
210 # name: escher_sentences__dev__v0
211 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/track_objects/samples.jsonl
212 # name: track_objects__dev__v0
213 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/shopping_discount_comparison/samples.jsonl
214 # name: shopping_discount_comparison__dev__v0
215 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/test_comp_sci/questions.jsonl
216 # name: computer_science_problems__s1__simple_v0
217 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/mendelian_inheritance/samples.jsonl
218 # name: mendelian_inheritance__dev__v0
219 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/override-system-instruction/samples.jsonl
220 # name: override_system_instruction__dev__v0
221 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/poker_hand_ranks/full_samples.jsonl
222 # name: hand_ranks__test__v1
223 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/diabetes/samples.jsonl
224 # name: diabetes__dev__v0
225 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/job_listing_title_for_a_caregiver_in_japan/samples.jsonl
226 # name: job_listing_title_for_a_caregiver_in_japan__test__v1
227 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/poker_analysis/samples.jsonl
228 # name: poker_analysis__test__v1
229 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/belarusian_numerals/samples.jsonl
230 # name: belarusian_numerals__dev__v0
231 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/algebra_word_problems/samples.jsonl
232 # name: algebra_word_problems__s1__simple_v0
233 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/belarusian_grammar/samples.jsonl
234 # name: belarusian_grammar__dev__v0
235 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/svg_understanding/samples.jsonl
236 # name: svg_understanding__v0
237 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/cissp-study-questions/samples.jsonl
238 # name: cissp_study_questions__test__v1
239 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/linear_equations/samples.jsonl
240 # name: linear_equations__dev__v0
241 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/japanese_driving_license/samples.jsonl
242 # name: japanese_driving_license__s1__simple_v0
243 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/first-letters/samples.jsonl
244 # name: first_letters__dev__v0
245 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/arc/samples.jsonl
246 # name: arc__dev__v0
247 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/css-selectors/verbal.jsonl
248 # name: css_selectors_verbal__dev__v0
249 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/japanese-itpassport-exam01/japanese-itpassport-exam01.jsonl
250 # name: japanese_itpassport_exam01__dev__v0
251 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/logiqa/logiqa.jsonl
252 # name: logiqa__dev__v0
253 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/chinese_zodiac/samples.jsonl
254 # name: chinese_zodiac__dev__v0
255 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/spanish-lexicon/samples.jsonl
256 # name: spanish_lexicon__dev__v0
257 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/food/samples.jsonl
258 # name: food__test__v1
259 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/countries/samples.jsonl
260 # name: countries__dev__v0
261 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/which_is_heavier/which_is_heavier.jsonl
262 # name: which_is_heavier__dev__v0
263 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/korean_date_counting/samples.jsonl
264 # name: korean_date_counting__dev__v0
265 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/fcc_amateur_extra/samples.jsonl
266 # name: fcc_amateur_extra__dev__v0
267 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/multistep-word-problems/samples.jsonl
268 # name: multistep_word_problems__dev__v0
269 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/list_comparison_missing_name/samples.jsonl
270 # name: list_comparison_missing_name__dev__v0
271 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/newsology/samples.jsonl
272 # name: newsology__dev__v0
273 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/simple-visual-understanding/simple-visual-understanding.jsonl
274 # name: simple_visual_understanding__dev__v0
275 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/portuguese-syllable-count/samples.jsonl
276 # name: portuguese_syllable_count__dev__v0
277 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/south-african-bands/south-african-bands.jsonl
278 # name: south_african_bands__dev__v0
279 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/hebrew_plurals/samples.jsonl
280 # name: hebrew_plurals__dev__v0
281 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/rot13/rot13.jsonl
282 # name: rot13__s1__simple_v0
283 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/korean_dialects/samples.jsonl
284 # name: korean_dialects__dev__v0
285 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/test_time_zone_conversion/samples.v0.jsonl
286 # name: test_time_zone_conversion__dev__v0
287 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/music_theory/music_theory_chord_notes.jsonl
288 # name: music_theory_chord_notes__dev__v0
289 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/russian-english-homonym-context-resolution/samples.jsonl
290 # name: russian_english_homonym_context_resolution__dev__v0
291 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/number_reading/number_reading.jsonl
292 # name: number_reading__dev__v0
293 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/simple-knowledge-mongolian/samples.v0.jsonl
294 # name: simple_knowledge_mongolian__dev__v0
295 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/base64_decode/base64_decode.jsonl
296 # name: base64_decode_simple__dev__v0
297 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/urdu-transliteration/samples.jsonl
298 # name: urdu_transliteration__dev__v0
299 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/reverse-polish-notation/questions.jsonl
300 # name: reverse_polish_notation__dev__v0
301 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/music_theory/music_theory_chord_names.jsonl
302 # name: music_theory_chord_names__dev__v0
303 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/born_first/born_first.jsonl
304 # name: born_first__dev__v0
305 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/tetris/tetris.jsonl
306 # name: tetris__dev__v0
307 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/pure_korean/samples.jsonl
308 # name: pure_korean__dev__v0
309 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/determinant/samples.jsonl
310 # name: determinant__test__v1
311 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/split_chinese_characters/samples.jsonl
312 # name: split_chinese_characters__dev__v0
313 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/syntax-check/samples.jsonl
314 # name: syntax_check__dev__v1
315 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/balance_chemical_equation/samples.jsonl
316 # name: balance_chemical_equation__dev__v0
317 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/emotional-intelligence/samples.jsonl
318 # name: emotional_intelligence__dev__v0
319 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/nutrition/facts.jsonl
320 # name: nutrition__dev__v0
321 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/reverse-sort-words-eng/samples.jsonl
322 # name: reverse_sort_words_eng_simple__dev__v0
323 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/day-of-week-from-date/samples.jsonl
324 # name: day_of_week_from_date__dev__v0
325 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/regex-match/samples.jsonl
326 # name: regex__match__dev__v0
327 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/find-letter/samples.jsonl
328 # name: find_letter__dev__v0
329 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/korean_foreign_words/samples.jsonl
330 # name: korean_foreign_words__dev__v0
331 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/greek_vocabulary/samples.jsonl
332 # name: greek_vocabulary__dev__v0
333 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/rubiks-colors/samples.jsonl
334 # name: rubiks_colors__dev__v0
335 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/decrypt_caesar_cipher/samples.jsonl
336 # name: decrypt_caesar_cipher__dev__v0
337 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/us_tort_law/samples.jsonl
338 # name: us_tort_law__dev__v0
339 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/number_pattern/samples.jsonl
340 # name: number_pattern__dev__v0
341 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/confusing_korean/samples.jsonl
342 # name: confusing_korean__dev__v0
343 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/kanji-idioms/samples.jsonl
344 # name: kanji_idioms__test__v0
345 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/missing_operators/samples.jsonl
346 # name: missing_operators__s1__simple_v0
347 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/unsolvable_questions/samples.jsonl
348 # name: unsolvable_questions__dev__v0
349 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/portuguese-sarcasm/samples.jsonl
350 # name: portuguese_sarcasm__dev__v0
351 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/swap-words/samples.jsonl
352 # name: swap_words__dev__v0
353 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/hebrew_same_noun_gender/samples.jsonl
354 # name: hebrew_same_noun_gender__v0
355 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/heart-disease/samples.jsonl
356 # name: heart_disease__v0
357 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/last_word_nth/samples.jsonl
358 # name: last_word_nth__s1__simple_v0
359 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/ascii_wordart/ascii_wordart.jsonl
360 # name: ascii_wordart__dev__v0
361 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/direct-speech-tag/samples.jsonl
362 # name: direct_speech_tag__dev__v0
363 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/italian-new-words/samples.jsonl
364 # name: italian_new_words__dev__v0
365 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/irony/samples.jsonl
366 # name: irony__dev__v0
367 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/math_polish/samples.jsonl
368 # name: math_polish__dev__v0
369 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/irish-lexicon/samples.jsonl
370 # name: irish_lexicon__dev__v0
371 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/canto_wu_pronunciation/samples_zero.jsonl
372 # name: canto_wu_pronunciation__dev__v0
373 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/irrelevant-negative-diversion/irrelevant-negative-diversion.jsonl
374 # name: irrelevant_negative_diversion__dev__v0
375 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/invert_word_wise/invert.jsonl
376 # name: invert_word_wise__dev__v0
377 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/imperial_date_to_string/samples.jsonl
378 # name: imperial_date_to_string__dev__v0
379 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/gujarati_numerals/samples.jsonl
380 # name: gujarati_numerals__dev__v0
381 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/count_token_freq_dna/samples.jsonl
382 # name: count_token_freq_dna__dev__v0
383 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/french_homonym_and_homograph/samples.jsonl
384 # name: french_homonym_and_homograph__dev__v0
385 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/cube-pack/samples.jsonl
386 name: cube_pack__dev__v0
387 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/historical-kana-orthography-reading/samples.jsonl
388 name: historical_kana_orthography_reading__dev__v0
389 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/canto_wu_pronunciation/samples_few.jsonl
390 name: canto_wu_pronunciation_fewshot__dev__v0
391 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/accounting_audit/samples.jsonl
392 name: accounting_audit__dev__v0
393 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/brazilian-lexicon/samples.jsonl
394 name: brazilian_lexicon__dev__v0
395 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/naughty_strings/samples.jsonl
396 name: naughty_strings__test__v1
397 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/korean-phonetics/samples.jsonl
398 name: korean_phonetics__dev__v0
399 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/chinese_homophonic/chinese_homophonic.jsonl
400 name: chinese_homophonic__dev__v0
401 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/count_intersections_polynomial/samples.jsonl
402 name: count_intersections_polynomial__dev__v0
403 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/coqa/match.jsonl
404 name: coqa_match__dev__v0
405 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/latin_grammar/samples.jsonl
406 name: latin_grammar__dev__v0
407 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/bitwise/samples.jsonl
408 name: bitwise__dev__v0
409 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/shared_border/samples.jsonl
410 name: shared_borders__dev__v0
411 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/japanese-station/samples.jsonl
412 name: japanese_station__dev__v0
413 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/atpl_exams/samples.jsonl
414 name: atpl_exams__dev__v0
415 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/invoice_due_date_leap_day_adjustment/samples.jsonl
416 name: invoice_due_date_leap_day_adjustment__dev__v0
417 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/romanian_homonyms/samples.jsonl
418 name: romanian_homonyms__dev__v0
419 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/infiniteloop-match/infiniteloop-match.jsonl
420 name: infiniteloop_match__s1__simple_v0
421 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/russian-nlp-tasks/samples.jsonl
422 name: russian_nlp_tasks__dev__v0
423 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/chinese_chu_ci/samples.jsonl
424 name: chinese_chu_ci__dev__v0
425 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/polish-syllable-count/samples.jsonl
426 name: polish_syllable_count__val__v0
427 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/korean-postposition/samples.jsonl
428 name: korean_postposition__dev__v0
429 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/bulgarian-lexicon/samples.jsonl
430 name: bulgarian_lexicon__dev__v0
431 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/compare-countries-area/samples.jsonl
432 name: compare_countries_area__dev__v0
433 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/pattern_identification/samples.v0.jsonl
434 name: pattern_identification__dev__v0
435 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/belarusian_synonyms/samples.jsonl
436 name: belarusian_synonyms__dev__v0
437 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/spanish_feminine_noun_masculine_article/samples.jsonl
438 name: spanish_feminine_noun_masculine_article__dev__v0
439 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/sarcasm/samples.jsonl
440 name: sarcasm__test__v1
441 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/chinese_tang_poetries/sample.jsonl
442 name: chinese_tang_poetries__dev__match_v1
443 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/japanese_number_reading/japanese_number_reading.jsonl
444 name: japanese_number_reading__dev__v0
445 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/korean-honorific/samples.jsonl
446 name: korean_honorific__dev__v0
447 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/complex_replace_characters/samples.jsonl
448 name: complex_replace_characters__dev__v0
449 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/dice-rotation-sequence/samples.jsonl
450 name: dice_rotation_sequence__dev__v0
451 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/utah_real_estate/samples.jsonl
452 name: utah_real_estate__dev__v0
453 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/formal_logic/formal_logic_expressions.jsonl
454 name: formal_logic__dev__v0
455 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/resistor_ohm_calculator/samples.jsonl
456 name: resistor_ohm_calculator__dev__simple_v0
457 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/GOL/samples.jsonl
458 name: gol__dev__v1
459 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/icelandic-sentences-gec/samples.jsonl
460 name: icelandic_sentences_gec__dev__v0
461 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/chinese_modern_poem_identification/samples.jsonl
462 name: chinese_modern_poem_identification__test__v1
463 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/reverse_string/reverse_string.jsonl
464 name: reverse_string__s1__simple_v0
465 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/complex-analogies-en-ru/samples.jsonl
466 name: complex_analogies_en_ru__dev__v0
467 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/positive-binary-operations/samples.jsonl
468 name: positive_binary_operations__test__v1
469 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/hindi_shuddha/samples.jsonl
470 name: hindi_shuddha__dev__v0
471 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/tokyo-station-number/samples.jsonl
472 name: tokyo_station_number__dev__v0
473 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/chinese_famous_novel/samples.jsonl
474 name: chinese_famous_novel__dev__v0
475 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/diagrammatic_logic/samples.jsonl
476 name: diagrammatic_logic__dev__v2
477 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/polish-lexicon/samples.jsonl
478 name: polish_lexicon__dev__v0
479 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/wkt_understanding/samples.jsonl
480 name: wkt_understanding__dev__v0
481 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/japanese-national-medical-exam02/japanese-national-medical-exam02.jsonl
482 name: japanese_national_medical_exam02__dev__v0
483 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/cardinal-directions/samples.jsonl
484 name: cardinal_directions__dev__v0
485 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/rectangles/samples.jsonl
486 name: rectangles__dev__v0
487 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/hindi_upsc/samples.jsonl
488 name: hindi_upsc__dev__v0
489 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/three-pt-mapping/three_pt_mapping.jsonl
490 name: three_pt_mapping__dev__v0
491 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/polish-proverbs/samples.jsonl
492 name: polish_proverbs__dev__v0
493 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/indonesian_numbers/indonesian_numbers.jsonl
494 name: indonesian_numbers__dev__v0
495 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/chinese_song_ci/samples.jsonl
496 name: chinese_song_ci__dev__v0
497 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/cybersecurity/filepaths.jsonl
498 name: cybersecurity_filepaths__dev__v0
499 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/taxes/samples.jsonl
500 name: taxes__dev__v0
501 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/crontab/samples.jsonl
502 name: crontab__dev__v0
503 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/integer-sequence-predictions/misc-and-recent-sequences.jsonl
504 name: integer_sequence_predictions_misc__dev__v0
505 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/integer-sequence-predictions/obscure-sequences.jsonl
506 name: integer_sequence_predictions_obscure__dev__v0
507 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/integer-sequence-predictions/notable-sequences.jsonl
508 name: integer_sequence_predictions_notable__dev__v0
509 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/integer-sequence-predictions/samples.jsonl
510 name: integer_sequence_predictions__dev__v0
511 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/belarusian_orthography/samples.jsonl
512 name: belarusian_orthography__dev__v0
513 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/date-booking/samples.jsonl
514 name: date_booking__dev__v0
515 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/interlingual-homograph/samples.jsonl
516 name: interlingual_homograph__dev__v0
517 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/stats-tests/samples.jsonl
518 name: stats_tests__dev__v0
519 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/belarusian_russian_translation/samples.jsonl
520 name: belarusian_russian_translation__dev__v0
521 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/date-calculator/samples.jsonl
522 name: date_calculator__test__v1
523 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/chinese_poem/samples.jsonl
524 name: chinese_poem__dev__v0
525 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/belarusian_lexicon/samples.jsonl
526 name: belarusian_lexicon__dev__v0
527 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/test_english_pronunciations/samples.jsonl
528 name: test_english_pronunciations__dev__v0
529 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/anagrams/samples.jsonl
530 name: anagrams__test__v1
531 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/guess_the_singer/samples.jsonl
532 name: guess_the_singer__dev__v0
533 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/illinois-law/samples.jsonl
534 name: illinois_law__v0
535 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/russian_medical/samples.jsonl
536 name: russian_medical__dev__v0
537 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/bigrams/samples.jsonl
538 name: bigrams__dev__v0
539 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/probability_questions/probability_questions.jsonl
540 name: probability_questions__dev__v0
541 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/vintage_phone_keyboard_decode/samples.jsonl
542 name: vintage_phone_keyboard_decode__dev__v0
543 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/connect4/samples.jsonl
544 name: connect4__s1__v1
545 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/stock_options/stock_options_bull_call_spread.jsonl
546 name: stock_options_bull_call_spread__dev__v0
547 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/stock_options/stock_options_bear_call_spread.jsonl
548 name: stock_options_bear_call_spread__dev__v0
549 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/stock_options/stock_option_terms_bear_call_spread.jsonl
550 name: stock_option_terms_bear_call_spread__dev__v0
551 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/stock_options/stock_option_terms_iron_butterfly_spread.jsonl
552 name: stock_option_terms_iron_butterfly_spread__dev__v0
553 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/stock_options/stock_option_terms_bull_call_spread.jsonl
554 name: stock_option_terms_bull_call_spread__dev__v0
555 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/stock_options/stock_options_inverse_iron_condor_spread.jsonl
556 name: stock_options_inverse_iron_condor_spread__dev__v0
557 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/stock_options/stock_options_iron_condor_spread.jsonl
558 name: stock_options_iron_condor_spread__dev__v0
559 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/stock_options/stock_option_terms_iron_condor_spread.jsonl
560 name: stock_option_terms_iron_condor_spread__dev__v0
561 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/stock_options/stock_options_inverse_iron_butterfly_spread.jsonl
562 name: stock_options_inverse_iron_butterfly_spread__dev__v0
563 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/stock_options/stock_option_terms_inverse_iron_condor_spread.jsonl
564 name: stock_option_terms_inverse_iron_condor_spread__dev__v0
565 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/japanese_romantic_context/samples.jsonl
566 name: japanese_romantic_context__dev__v0
567 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/phonetics-identify-words-needing-missing-gpcs/samples.jsonl
568 name: phonetics_identify_words_needing_missing_gpcs__s1__simple_v0
569 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/prompt-injection/samples.jsonl
570 name: prompt_injection__dev__v0
571 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/word_vector_over_reliance/word_vector_over_reliance_samples.jsonl
572 name: word_vector_over_reliance__dev__simple_v0
573 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/lunar_calendar/iso_to_lunar_calendar.jsonl
574 name: iso_to_lunar_calendar__dev__v0
575 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/lunar_calendar/lunar_calendar_to_iso.jsonl
576 name: lunar_calendar_to_iso__dev__v0
577 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/code_combination/samples.jsonl
578 name: code_combination__dev__v0
579 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/partially_solved_crossword_clues/samples.jsonl
580 name: partially_solved_crossword_clues__dev__v0
581 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/quartz/samples.jsonl
582 name: quartz__test__v1
583 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/physics-interaction/samples.jsonl
584 name: physics__interaction__dev__v0
585 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/next-val-series/next-val-series.jsonl
586 name: next_val_series__dev__simple_v0
587evals:
588 # - name: actors-sequence
589 # dataset: actors_sequence__dev__match_v1
590 # scorers:
591 # - match
592 # - name: adultery_state_laws
593 # dataset: adultery_state_laws__dev__v0
594 # scorers:
595 # - match
596 # - name: proofreader
597 # dataset: proofreader__dev__v0
598 # scorers:
599 # - match
600 # - name: rock-climbing
601 # dataset: rock_climbing__dev__v0
602 # scorers:
603 # - match
604 # - name: match_banking77
605 # dataset: match_banking77__test__v1
606 # scorers:
607 # - match
608 # - name: ukraine-gec-grammar-prep
609 # dataset: ukraine_gec_grammar_prep__dev__v0
610 # scorers:
611 # - match
612 # - name: ukraine-gec-grammar-case
613 # dataset: ukraine_gec_grammar_case__dev__v0
614 # scorers:
615 # - match
616 # - name: ukraine-gec-grammar-gender
617 # dataset: ukraine_gec_grammar_gender__dev__v0
618 # scorers:
619 # - match
620 # - name: ukraine-gec-grammar-partvoice
621 # dataset: ukraine_gec_grammar_partvoice__dev__v0
622 # scorers:
623 # - match
624 # - name: ukraine-gec-fluency-poorflow
625 # dataset: ukraine_gec_fluency_poorflow__dev__v0
626 # scorers:
627 # - match
628 # - name: ukraine-gec-grammar-verbvoice
629 # dataset: ukraine_gec_grammar_verbvoice__dev__v0
630 # scorers:
631 # - match
632 # - name: ukraine-gec-grammar-number
633 # dataset: ukraine_gec_grammar_number__dev__v0
634 # scorers:
635 # - match
636 # - name: ukraine-gec-fluency-repetition
637 # dataset: ukraine_gec_fluency_repetition__dev__v0
638 # scorers:
639 # - match
640 # - name: ukraine-gec-fluency-calque
641 # dataset: ukraine_gec_fluency_calque__dev__v0
642 # scorers:
643 # - match
644 # - name: ukraine-gec-grammar-verbaform
645 # dataset: ukraine_gec_grammar_verbaform__dev__v0
646 # scorers:
647 # - match
648 # - name: ukraine-gec-grammar-ungrammaticalstructure
649 # dataset: ukraine_gec_grammar_ungrammaticalstructure__dev__v0
650 # scorers:
651 # - match
652 # - name: ukraine-gec-grammar-other
653 # dataset: ukraine_gec_grammar_other__dev__v0
654 # scorers:
655 # - match
656 # - name: ukraine-gec-fluency-style
657 # dataset: ukraine_gec_fluency_style__dev__v0
658 # scorers:
659 # - match
660 # - name: ukraine-gec-fluency-other
661 # dataset: ukraine_gec_fluency_other__dev__v0
662 # scorers:
663 # - match
664 # - name: ukraine-gec-grammar-conjunction
665 # dataset: ukraine_gec_grammar_conjunction__dev__v0
666 # scorers:
667 # - match
668 # - name: ukraine-gec-grammar-comparison
669 # dataset: ukraine_gec_grammar_comparison__dev__v0
670 # scorers:
671 # - match
672 # - name: ukraine-gec-grammar-tense
673 # dataset: ukraine_gec_grammar_tense__dev__v0
674 # scorers:
675 # - match
676 # - name: ukraine-gec-grammar-aspect
677 # dataset: ukraine_gec_grammar_aspect__dev__v0
678 # scorers:
679 # - match
680 # - name: irish-plural-nouns
681 # dataset: irish_plural_nouns__dev__v0
682 # scorers:
683 # - match
684 # - name: shape-in-shape
685 # dataset: shape_in_shape__dev__v1
686 # scorers:
687 # - match
688 # - name: russian_sarcasm
689 # dataset: russian_sarcasm__dev__v0
690 # scorers:
691 # - match
692 # - name: syllables_long_words
693 # dataset: syllables__dev__v1
694 # scorers:
695 # - match
696 # - name: crepe
697 # dataset: crepe__dev__v2
698 # scorers:
699 # - match
700 # - name: coq-proof-step-match
701 # dataset: coq_proof_step_match__dev__v0
702 # scorers:
703 # - match
704 # - name: ukraine-eit
705 # dataset: ukraine_eit__val__v0
706 # scorers:
707 # - match
708 # - name: belarusian-proverbs
709 # dataset: belarusian_proverbs__dev__v0
710 # scorers:
711 # - match
712 # - name: invoices
713 # dataset: invoices__dev__v0
714 # scorers:
715 # - match
716 # - name: urdu-lexicon
717 # dataset: urdu_lexicon__dev__v0
718 # scorers:
719 # - match
720 # - name: qa
721 # dataset: qa__dev__v0
722 # scorers:
723 # - match
724 # - name: french-part-of-speech
725 # dataset: french_part_of_speech__dev__v0
726 # scorers:
727 # - match
728 # - name: internal_representations
729 # dataset: internal_representations__dev__v0
730 # scorers:
731 # - match
732 # - name: python_list_comprehension
733 # dataset: python_list_comprehension__dev__v0
734 # scorers:
735 # - match
736 # - name: nepali-numerals
737 # dataset: nepali_numerals__dev__v0
738 # scorers:
739 # - match
740 # - name: belarusian-syllable-count
741 # dataset: belarusian_syllable_count__dev__v0
742 # scorers:
743 # - match
744 # - name: mandaliof-table
745 # dataset: mandaliof_table__dev__v0
746 # scorers:
747 # - match
748 # - name: test_japanese_english_numerals
749 # dataset: test_japanese_english_numerals__dev__v0
750 # scorers:
751 # - match
752 # - name: tracking-shuffled-objects
753 # dataset: tracking_shuffled_objects__dev__v0
754 # scorers:
755 # - match
756 # - name: squares-gpt
757 # dataset: squares_gpt__dev__v0
758 # scorers:
759 # - match
760 # - name: convert-hex-hsl-lightness
761 # dataset: convert_hex_hsl_lightness__dev__v0
762 # scorers:
763 # - match
764 # - name: russe
765 # dataset: russe__test__v0
766 # scorers:
767 # - match
768 # - name: aba_mrpc_true_false
769 # dataset: aba_mrpc_true_false__dev__v0
770 # scorers:
771 # - match
772 # - name: logical_counting
773 # dataset: logical_counting__dev__v0
774 # scorers:
775 # - match
776 # - name: vigenere
777 # dataset: vigenere__s1__simple_v0
778 # scorers:
779 # - match
780 # - name: map-electronic-component-part-to-fact
781 # dataset: map_electronic_component_part_to_fact__dev__v0
782 # scorers:
783 # - match
784 # - name: rare-and-loanwords-dutch-lexicon
785 # dataset: rare_and_loanwords_dutch_lexicon__dev__v0
786 # scorers:
787 # - match
788 # - name: product_information_extraction_one_shot
789 # dataset: product_information_extraction_one_shot__dev__v0
790 # scorers:
791 # - match
792 # - name: product_information_extraction_zero_shot
793 # dataset: product_information_extraction_zero_shot__dev__v0
794 # scorers:
795 # - match
796 # - name: sort-numbers
797 # dataset: sort_numbers__s1__simple_v0
798 # scorers:
799 # - match
800 # - name: match_product-matching_zeroshot
801 # dataset: match_product_matching_zeroshot__dev__v1
802 # scorers:
803 # - match
804 # - name: match_product-matching_fewshot
805 # dataset: match_product_matching_fewshot__dev__v1
806 # scorers:
807 # - match
808 # - name: match_product-matching_rules
809 # dataset: match_product_matching_rules__dev__v1
810 # scorers:
811 # - match
812 # - name: russian-lexicon
813 # dataset: russian_lexicon__dev__v0
814 # scorers:
815 # - match
816 # - name: dutch-lexicon
817 # dataset: dutch_lexicon__dev__v0
818 # scorers:
819 # - match
820 # - name: greek-nt-manuscripts
821 # dataset: greek_nt_manuscripts__v0
822 # scorers:
823 # - match
824 # - name: matrix_mult_rows
825 # dataset: matrix_mult_rows__dev__v0
826 # scorers:
827 # - match
828 # - name: moral_exceptQA
829 # dataset: moral_exceptqa__test__v1
830 # scorers:
831 # - match
832 # - name: music-theory-triads-identification
833 # dataset: music_theory_triads_identification__dev__v0
834 # scorers:
835 # - match
836 # - name: music-theory-tetrads-identification
837 # dataset: music_theory_tetrads_identification__dev__v0
838 # scorers:
839 # - match
840 # - name: find-thirukkural
841 # dataset: find_thirukkural__dev__v0
842 # scorers:
843 # - match
844 # - name: building_floorplan
845 # dataset: building_floorplan__test__v1
846 # scorers:
847 # - match
848 # - name: japanese-national-medical-exam01
849 # dataset: japanese_national_medical_exam01__dev__v0
850 # scorers:
851 # - match
852 # - name: lat_long_identify
853 # dataset: lat_long_identify__dev__v0
854 # scorers:
855 # - match
856 # - name: norwegian-lexicon
857 # dataset: norwegian_lexicon__dev__v0
858 # scorers:
859 # - match
860 # - name: german-part-of-speech
861 # dataset: german_part_of_speech__dev__v0
862 # scorers:
863 # - match
864 # - name: swedish_sat
865 # dataset: swedish_sat__dev__v0
866 # scorers:
867 # - match
868 # - name: utility_price_parsing
869 # dataset: utility_price_parsing__dev__v0
870 # scorers:
871 # - match
872 # - name: korean-consonant-vowel-combination
873 # dataset: korean_consonant_vowel_combination__dev__v0
874 # scorers:
875 # - match
876 # - name: mate-in-one
877 # dataset: mate_in_one__dev__v0
878 # scorers:
879 # - match
880 # - name: french-lexicon
881 # dataset: french_lexicon__dev__v0
882 # scorers:
883 # - match
884 # - name: swedish-spelling
885 # dataset: swedish_spelling__dev__v0
886 # scorers:
887 # - match
888 # - name: knot-theory-unknotting-number
889 # dataset: knot_theory_unknotting_number__dev__v0
890 # scorers:
891 # - match
892 # - name: knot-theory-unknotting-problem
893 # dataset: knot_theory_unknotting_problem__dev__v0
894 # scorers:
895 # - match
896 # - name: knot-theory-code-conversion
897 # dataset: knot_theory_code_conversion__dev__v0
898 # scorers:
899 # - match
900 # - name: hindi_words
901 # dataset: hindi_words__dev__v0
902 # scorers:
903 # - match
904 # - name: arithmetical_puzzles
905 # dataset: arithmetical_puzzles__dev__v0
906 # scorers:
907 # - match
908 # - name: belarusian-antonyms
909 # dataset: belarusian_antonyms__dev__v0
910 # scorers:
911 # - match
912 # - name: body-movement
913 # dataset: body_movement__dev__zero_shot_v0
914 # scorers:
915 # - match
916 # - name: afrikaans-lexicon
917 # dataset: afrikaans_lexicon__dev__v0
918 # scorers:
919 # - match
920 # - name: cricket_situations
921 # dataset: cricket_situations__dev__v0
922 # scorers:
923 # - match
924 # - name: korean_spelling
925 # dataset: korean_spelling__dev__v0
926 # scorers:
927 # - match
928 # - name: rucola
929 # dataset: rucola__test__v0
930 # scorers:
931 # - match
932 # - name: reclor-logical-reasoning-plus
933 # dataset: reclor_logical_reasoning_plus__dev__v0
934 # scorers:
935 # - match
936 # - name: logiqav2-logical-reasoning-plus
937 # dataset: logiqav2_logical_reasoning_plus__dev__v0
938 # scorers:
939 # - match
940 # - name: logiqa-logical-reasoning-plus
941 # dataset: logiqa_logical_reasoning_plus__dev__v0
942 # scorers:
943 # - match
944 # - name: medmcqa
945 # dataset: medmcqa__dev__v0
946 # scorers:
947 # - match
948 # - name: multi-step-equations
949 # dataset: multi_step_equations__dev__v0
950 # scorers:
951 # - match
952 # - name: japanese-remote-island-to-prefecture
953 # dataset: japanese_remote_island_to_prefecture__dev__v0
954 # scorers:
955 # - match
956 # - name: chinese_homonym
957 # dataset: chinese_homonym__dev__v0
958 # scorers:
959 # - match
960 # - name: norwegian-rhymes
961 # dataset: norwegian_rhymes__dev__v0
962 # scorers:
963 # - match
964 # - name: chinese_shi_jing
965 # dataset: chinese_shi_jing__test__v1
966 # scorers:
967 # - match
968 # - name: forth-stack-sim-basic
969 # dataset: forth_stack_sim_basic__dev__v0
970 # scorers:
971 # - match
972 # - name: forth-stack-sim
973 # dataset: forth_stack_sim__dev__v0
974 # scorers:
975 # - match
976 # - name: forth-stack-sim-detailed
977 # dataset: forth_stack_sim_detailed__dev__v0
978 # scorers:
979 # - match
980 # - name: japanese_city_name_pronunciation
981 # dataset: japanese_city_name_pronunciation__dev__v0
982 # scorers:
983 # - match
984 # - name: escher-sentences
985 # dataset: escher_sentences__dev__v0
986 # scorers:
987 # - match
988 # - name: track_objects
989 # dataset: track_objects__dev__v0
990 # scorers:
991 # - match
992 # - name: shopping_discount_comparison
993 # dataset: shopping_discount_comparison__dev__v0
994 # scorers:
995 # - match
996 # - name: computer-science-problems
997 # dataset: computer_science_problems__s1__simple_v0
998 # scorers:
999 # - match
1000 # - name: mendelian_inheritance
1001 # dataset: mendelian_inheritance__dev__v0
1002 # scorers:
1003 # - match
1004 # - name: override-system-instruction
1005 # dataset: override_system_instruction__dev__v0
1006 # scorers:
1007 # - match
1008 # - name: hand_ranks-match
1009 # dataset: hand_ranks__test__v1
1010 # scorers:
1011 # - match
1012 # - name: diabetes
1013 # dataset: diabetes__dev__v0
1014 # scorers:
1015 # - match
1016 # - name: job_listing_title_for_a_caregiver_in_japan
1017 # dataset: job_listing_title_for_a_caregiver_in_japan__test__v1
1018 # scorers:
1019 # - match
1020 # - name: poker_analysis
1021 # dataset: poker_analysis__test__v1
1022 # scorers:
1023 # - match
1024 # - name: belarusian-numerals
1025 # dataset: belarusian_numerals__dev__v0
1026 # scorers:
1027 # - match
1028 # - name: algebra-word-problems
1029 # dataset: algebra_word_problems__s1__simple_v0
1030 # scorers:
1031 # - match
1032 # - name: belarusian-grammar
1033 # dataset: belarusian_grammar__dev__v0
1034 # scorers:
1035 # - match
1036 # - name: svg_understanding
1037 # dataset: svg_understanding__v0
1038 # scorers:
1039 # - match
1040 # - name: cissp-study-questions
1041 # dataset: cissp_study_questions__test__v1
1042 # scorers:
1043 # - match
1044 # - name: linear-equations
1045 # dataset: linear_equations__dev__v0
1046 # scorers:
1047 # - match
1048 # - name: japanese_driving_license
1049 # dataset: japanese_driving_license__s1__simple_v0
1050 # scorers:
1051 # - match
1052 # - name: first-letters
1053 # dataset: first_letters__dev__v0
1054 # scorers:
1055 # - match
1056 # - name: arc
1057 # dataset: arc__dev__v0
1058 # scorers:
1059 # - match
1060 # - name: css-selectors-verbal
1061 # dataset: css_selectors_verbal__dev__v0
1062 # scorers:
1063 # - match
1064 # - name: japanese-itpassport-exam01
1065 # dataset: japanese_itpassport_exam01__dev__v0
1066 # scorers:
1067 # - match
1068 # - name: logiqa
1069 # dataset: logiqa__dev__v0
1070 # scorers:
1071 # - match
1072 # - name: chinese_zodiac
1073 # dataset: chinese_zodiac__dev__v0
1074 # scorers:
1075 # - match
1076 # - name: spanish-lexicon
1077 # dataset: spanish_lexicon__dev__v0
1078 # scorers:
1079 # - match
1080 # - name: food
1081 # dataset: food__test__v1
1082 # scorers:
1083 # - match
1084 # - name: countries
1085 # dataset: countries__dev__v0
1086 # scorers:
1087 # - match
1088 # - name: which-is-heavier
1089 # dataset: which_is_heavier__dev__v0
1090 # scorers:
1091 # - match
1092 # - name: korean_date_counting
1093 # dataset: korean_date_counting__dev__v0
1094 # scorers:
1095 # - match
1096 # - name: fcc_amateur_extra
1097 # dataset: fcc_amateur_extra__dev__v0
1098 # scorers:
1099 # - match
1100 # - name: multistep-word-problems
1101 # dataset: multistep_word_problems__dev__v0
1102 # scorers:
1103 # - match
1104 # - name: list_comparison_missing_name
1105 # dataset: list_comparison_missing_name__dev__v0
1106 # scorers:
1107 # - match
1108 # - name: newsology
1109 # dataset: newsology__dev__v0
1110 # scorers:
1111 # - match
1112 # - name: simple-visual-understanding
1113 # dataset: simple_visual_understanding__dev__v0
1114 # scorers:
1115 # - match
1116 # - name: portuguese-syllable-count
1117 # dataset: portuguese_syllable_count__dev__v0
1118 # scorers:
1119 # - match
1120 # - name: south-african-bands
1121 # dataset: south_african_bands__dev__v0
1122 # scorers:
1123 # - match
1124 # - name: hebrew-plurals
1125 # dataset: hebrew_plurals__dev__v0
1126 # scorers:
1127 # - match
1128 # - name: rot13
1129 # dataset: rot13__s1__simple_v0
1130 # scorers:
1131 # - match
1132 # - name: korean_dialects
1133 # dataset: korean_dialects__dev__v0
1134 # scorers:
1135 # - match
1136 # - name: test-time-zone-conversion
1137 # dataset: test_time_zone_conversion__dev__v0
1138 # scorers:
1139 # - match
1140 # - name: music-theory-chord-notes
1141 # dataset: music_theory_chord_notes__dev__v0
1142 # scorers:
1143 # - match
1144 # - name: russian-english-homonym-context-resolution
1145 # dataset: russian_english_homonym_context_resolution__dev__v0
1146 # scorers:
1147 # - match
1148 # - name: number-reading
1149 # dataset: number_reading__dev__v0
1150 # scorers:
1151 # - match
1152 # - name: simple-knowledge-mongolian
1153 # dataset: simple_knowledge_mongolian__dev__v0
1154 # scorers:
1155 # - match
1156 # - name: base64-decode
1157 # dataset: base64_decode_simple__dev__v0
1158 # scorers:
1159 # - match
1160 # - name: urdu-transliteration
1161 # dataset: urdu_transliteration__dev__v0
1162 # scorers:
1163 # - match
1164 # - name: reverse-polish-notation
1165 # dataset: reverse_polish_notation__dev__v0
1166 # scorers:
1167 # - match
1168 # - name: music-theory-chord-names
1169 # dataset: music_theory_chord_names__dev__v0
1170 # scorers:
1171 # - match
1172 # - name: born-first
1173 # dataset: born_first__dev__v0
1174 # scorers:
1175 # - match
1176 # - name: tetris
1177 # dataset: tetris__dev__v0
1178 # scorers:
1179 # - match
1180 # - name: pure_korean
1181 # dataset: pure_korean__dev__v0
1182 # scorers:
1183 # - match
1184 # - name: determinant
1185 # dataset: determinant__test__v1
1186 # scorers:
1187 # - match
1188 # - name: split_chinese_characters
1189 # dataset: split_chinese_characters__dev__v0
1190 # scorers:
1191 # - match
1192 # - name: syntax-check
1193 # dataset: syntax_check__dev__v1
1194 # scorers:
1195 # - match
1196 # - name: balance-chemical-equation
1197 # dataset: balance_chemical_equation__dev__v0
1198 # scorers:
1199 # - match
1200 # - name: emotional-intelligence
1201 # dataset: emotional_intelligence__dev__v0
1202 # scorers:
1203 # - match
1204 # - name: nutrition
1205 # dataset: nutrition__dev__v0
1206 # scorers:
1207 # - match
1208 # - name: reverse-sort-words-eng
1209 # dataset: reverse_sort_words_eng_simple__dev__v0
1210 # scorers:
1211 # - match
1212 # - name: day-of-week-from-date
1213 # dataset: day_of_week_from_date__dev__v0
1214 # scorers:
1215 # - match
1216 # - name: regex-match
1217 # dataset: regex__match__dev__v0
1218 # scorers:
1219 # - match
1220 # - name: find-letter
1221 # dataset: find_letter__dev__v0
1222 # scorers:
1223 # - match
1224 # - name: korean_foreign_words
1225 # dataset: korean_foreign_words__dev__v0
1226 # scorers:
1227 # - match
1228 # - name: greek-vocabulary
1229 # dataset: greek_vocabulary__dev__v0
1230 # scorers:
1231 # - match
1232 # - name: rubiks-colors
1233 # dataset: rubiks_colors__dev__v0
1234 # scorers:
1235 # - match
1236 # - name: decrypt-caesar-cipher
1237 # dataset: decrypt_caesar_cipher__dev__v0
1238 # scorers:
1239 # - match
1240 # - name: us-tort-law
1241 # dataset: us_tort_law__dev__v0
1242 # scorers:
1243 # - match
1244 # - name: number-pattern
1245 # dataset: number_pattern__dev__v0
1246 # scorers:
1247 # - match
1248 # - name: confusing_korean
1249 # dataset: confusing_korean__dev__v0
1250 # scorers:
1251 # - match
1252 # - name: kanji-idioms
1253 # dataset: kanji_idioms__test__v0
1254 # scorers:
1255 # - match
1256 # - name: missing-operators
1257 # dataset: missing_operators__s1__simple_v0
1258 # scorers:
1259 # - match
1260 # - name: unsolvable_questions
1261 # dataset: unsolvable_questions__dev__v0
1262 # scorers:
1263 # - match
1264 # - name: portuguese-sarcasm
1265 # dataset: portuguese_sarcasm__dev__v0
1266 # scorers:
1267 # - match
1268 # - name: swap-words
1269 # dataset: swap_words__dev__v0
1270 # scorers:
1271 # - match
1272 # - name: hebrew-same-noun-gender
1273 # dataset: hebrew_same_noun_gender__v0
1274 # scorers:
1275 # - match
1276 # - name: heart-disease
1277 # dataset: heart_disease__v0
1278 # scorers:
1279 # - match
1280 # - name: last-word-nth
1281 # dataset: last_word_nth__s1__simple_v0
1282 # scorers:
1283 # - match
1284 # - name: ascii-wordart
1285 # dataset: ascii_wordart__dev__v0
1286 # scorers:
1287 # - match
1288 # - name: direct-speech-tag
1289 # dataset: direct_speech_tag__dev__v0
1290 # scorers:
1291 # - match
1292 # - name: italian-new-words
1293 # dataset: italian_new_words__dev__v0
1294 # scorers:
1295 # - match
1296 # - name: irony
1297 # dataset: irony__dev__v0
1298 # scorers:
1299 # - match
1300 # - name: math_polish
1301 # dataset: math_polish__dev__v0
1302 # scorers:
1303 # - match
1304 # - name: irish-lexicon
1305 # dataset: irish_lexicon__dev__v0
1306 # scorers:
1307 # - match
1308 # - name: canto_wu_pronunciation
1309 # dataset: canto_wu_pronunciation__dev__v0
1310 # scorers:
1311 # - match
1312 # - name: irrelevant-negative-diversion
1313 # dataset: irrelevant_negative_diversion__dev__v0
1314 # scorers:
1315 # - match
1316 # - name: invert_word_wise
1317 # dataset: invert_word_wise__dev__v0
1318 # scorers:
1319 # - match
1320 # - name: imperial_date_to_string
1321 # dataset: imperial_date_to_string__dev__v0
1322 # scorers:
1323 # - match
1324 # - name: gujarati-numerals
1325 # dataset: gujarati_numerals__dev__v0
1326 # scorers:
1327 # - match
1328 # - name: count_token_freq_dna
1329 # dataset: count_token_freq_dna__dev__v0
1330 # scorers:
1331 # - match
1332 # - name: french_homonym_and_homograph
1333 # dataset: french_homonym_and_homograph__dev__v0
1334 # scorers:
1335 # - match
1336 - name: cube-pack
1337 dataset: cube_pack__dev__v0
1338 scorers:
1339 - match
1340 - name: historical-kana-orthography-reading
1341 dataset: historical_kana_orthography_reading__dev__v0
1342 scorers:
1343 - match
1344 - name: canto_wu_pronunciation_fewshot
1345 dataset: canto_wu_pronunciation_fewshot__dev__v0
1346 scorers:
1347 - match
1348 - name: accounting_audit
1349 dataset: accounting_audit__dev__v0
1350 scorers:
1351 - match
1352 - name: brazilian-lexicon
1353 dataset: brazilian_lexicon__dev__v0
1354 scorers:
1355 - match
1356 - name: naughty_strings
1357 dataset: naughty_strings__test__v1
1358 scorers:
1359 - match
1360 - name: korean-phonetics
1361 dataset: korean_phonetics__dev__v0
1362 scorers:
1363 - match
1364 - name: chinese-homo
1365 dataset: chinese_homophonic__dev__v0
1366 scorers:
1367 - match
1368 - name: count_intersections_polynomial
1369 dataset: count_intersections_polynomial__dev__v0
1370 scorers:
1371 - match
1372 - name: coqa-match
1373 dataset: coqa_match__dev__v0
1374 scorers:
1375 - match
1376 - name: latin-grammar
1377 dataset: latin_grammar__dev__v0
1378 scorers:
1379 - match
1380 - name: bitwise
1381 dataset: bitwise__dev__v0
1382 scorers:
1383 - match
1384 - name: shared-borders
1385 dataset: shared_borders__dev__v0
1386 scorers:
1387 - match
1388 - name: japanese-station
1389 dataset: japanese_station__dev__v0
1390 scorers:
1391 - match
1392 - name: atpl_exams
1393 dataset: atpl_exams__dev__v0
1394 scorers:
1395 - match
1396 - name: invoice_due_date_leap_day_adjustment
1397 dataset: invoice_due_date_leap_day_adjustment__dev__v0
1398 scorers:
1399 - match
1400 - name: romanian_homonyms
1401 dataset: romanian_homonyms__dev__v0
1402 scorers:
1403 - match
1404 - name: infiniteloop-match
1405 dataset: infiniteloop_match__s1__simple_v0
1406 scorers:
1407 - match
1408 - name: russian-nlp-tasks
1409 dataset: russian_nlp_tasks__dev__v0
1410 scorers:
1411 - match
1412 - name: chinese_chu_ci
1413 dataset: chinese_chu_ci__dev__v0
1414 scorers:
1415 - match
1416 - name: polish-syllable-count
1417 dataset: polish_syllable_count__val__v0
1418 scorers:
1419 - match
1420 - name: korean-postposition
1421 dataset: korean_postposition__dev__v0
1422 scorers:
1423 - match
1424 - name: bulgarian-lexicon
1425 dataset: bulgarian_lexicon__dev__v0
1426 scorers:
1427 - match
1428 - name: compare-countries-area
1429 dataset: compare_countries_area__dev__v0
1430 scorers:
1431 - match
1432 - name: pattern_identification
1433 dataset: pattern_identification__dev__v0
1434 scorers:
1435 - match
1436 - name: belarusian-synonyms
1437 dataset: belarusian_synonyms__dev__v0
1438 scorers:
1439 - match
1440 - name: spanish_feminine_noun_masculine_article
1441 dataset: spanish_feminine_noun_masculine_article__dev__v0
1442 scorers:
1443 - match
1444 - name: sarcasm
1445 dataset: sarcasm__test__v1
1446 scorers:
1447 - match
1448 - name: chinese_tang_poetries
1449 dataset: chinese_tang_poetries__dev__match_v1
1450 scorers:
1451 - match
1452 - name: japanese-number-reading
1453 dataset: japanese_number_reading__dev__v0
1454 scorers:
1455 - match
1456 - name: korean-honorific
1457 dataset: korean_honorific__dev__v0
1458 scorers:
1459 - match
1460 - name: complex-replace-characters
1461 dataset: complex_replace_characters__dev__v0
1462 scorers:
1463 - match
1464 - name: dice-rotation-sequence
1465 dataset: dice_rotation_sequence__dev__v0
1466 scorers:
1467 - match
1468 - name: utah_real_estateh
1469 dataset: utah_real_estate__dev__v0
1470 scorers:
1471 - match
1472 - name: formal-logic
1473 dataset: formal_logic__dev__v0
1474 scorers:
1475 - match
1476 - name: resistor-ohm-calculator
1477 dataset: resistor_ohm_calculator__dev__simple_v0
1478 scorers:
1479 - match
1480 - name: gol
1481 dataset: gol__dev__v1
1482 scorers:
1483 - match
1484 - name: icelandic-sentences-gec
1485 dataset: icelandic_sentences_gec__dev__v0
1486 scorers:
1487 - match
1488 - name: chinese_modern_poem_identification
1489 dataset: chinese_modern_poem_identification__test__v1
1490 scorers:
1491 - match
1492 - name: reverse-string
1493 dataset: reverse_string__s1__simple_v0
1494 scorers:
1495 - match
1496 - name: complex-analogies-en-ru
1497 dataset: complex_analogies_en_ru__dev__v0
1498 scorers:
1499 - match
1500 - name: positive-binary-operations
1501 dataset: positive_binary_operations__test__v1
1502 scorers:
1503 - match
1504 - name: hindi_shuddha
1505 dataset: hindi_shuddha__dev__v0
1506 scorers:
1507 - match
1508 - name: tokyo-station-number
1509 dataset: tokyo_station_number__dev__v0
1510 scorers:
1511 - match
1512 - name: chinese_famous_novel
1513 dataset: chinese_famous_novel__dev__v0
1514 scorers:
1515 - match
1516 - name: diagrammatic_logic
1517 dataset: diagrammatic_logic__dev__v2
1518 scorers:
1519 - match
1520 - name: polish-lexicon
1521 dataset: polish_lexicon__dev__v0
1522 scorers:
1523 - match
1524 - name: wkt_understanding
1525 dataset: wkt_understanding__dev__v0
1526 scorers:
1527 - match
1528 - name: japanese-national-medical-exam02
1529 dataset: japanese_national_medical_exam02__dev__v0
1530 scorers:
1531 - match
1532 - name: cardinal-directions
1533 dataset: cardinal_directions__dev__v0
1534 scorers:
1535 - match
1536 - name: rectangles
1537 dataset: rectangles__dev__v0
1538 scorers:
1539 - match
1540 - name: hindi_upsc
1541 dataset: hindi_upsc__dev__v0
1542 scorers:
1543 - match
1544 - name: three-pt-mapping
1545 dataset: three_pt_mapping__dev__v0
1546 scorers:
1547 - match
1548 - name: polish-proverbs
1549 dataset: polish_proverbs__dev__v0
1550 scorers:
1551 - match
1552 - name: indonesian_numbers
1553 dataset: indonesian_numbers__dev__v0
1554 scorers:
1555 - match
1556 - name: chinese_song_ci
1557 dataset: chinese_song_ci__dev__v0
1558 scorers:
1559 - match
1560 - name: cybersecurity-filepaths
1561 dataset: cybersecurity_filepaths__dev__v0
1562 scorers:
1563 - match
1564 - name: taxes
1565 dataset: taxes__dev__v0
1566 scorers:
1567 - match
1568 - name: crontab
1569 dataset: crontab__dev__v0
1570 scorers:
1571 - match
1572 - name: integer-sequence-predictions-misc
1573 dataset: integer_sequence_predictions_misc__dev__v0
1574 scorers:
1575 - match
1576 - name: integer-sequence-predictions-obscure
1577 dataset: integer_sequence_predictions_obscure__dev__v0
1578 scorers:
1579 - match
1580 - name: integer-sequence-predictions-notable
1581 dataset: integer_sequence_predictions_notable__dev__v0
1582 scorers:
1583 - match
1584 - name: integer-sequence-predictions
1585 dataset: integer_sequence_predictions__dev__v0
1586 scorers:
1587 - match
1588 - name: belarusian-orthography
1589 dataset: belarusian_orthography__dev__v0
1590 scorers:
1591 - match
1592 - name: date-booking
1593 dataset: date_booking__dev__v0
1594 scorers:
1595 - match
1596 - name: interlingual-homograph
1597 dataset: interlingual_homograph__dev__v0
1598 scorers:
1599 - match
1600 - name: stats-tests
1601 dataset: stats_tests__dev__v0
1602 scorers:
1603 - match
1604 - name: belarusian-russian-translation
1605 dataset: belarusian_russian_translation__dev__v0
1606 scorers:
1607 - match
1608 - name: date-calculator
1609 dataset: date_calculator__test__v1
1610 scorers:
1611 - match
1612 - name: chinese_poem
1613 dataset: chinese_poem__dev__v0
1614 scorers:
1615 - match
1616 - name: belarusian-lexicon
1617 dataset: belarusian_lexicon__dev__v0
1618 scorers:
1619 - match
1620 - name: test_english_pronunciations
1621 dataset: test_english_pronunciations__dev__v0
1622 scorers:
1623 - match
1624 - name: anagrams
1625 dataset: anagrams__test__v1
1626 scorers:
1627 - match
1628 - name: guess-the-singer
1629 dataset: guess_the_singer__dev__v0
1630 scorers:
1631 - match
1632 - name: illinois-law
1633 dataset: illinois_law__v0
1634 scorers:
1635 - match
1636 - name: russian_medical
1637 dataset: russian_medical__dev__v0
1638 scorers:
1639 - match
1640 - name: bigrams
1641 dataset: bigrams__dev__v0
1642 scorers:
1643 - match
1644 - name: probability-questions
1645 dataset: probability_questions__dev__v0
1646 scorers:
1647 - match
1648 - name: vintage_phone_keyboard_decode
1649 dataset: vintage_phone_keyboard_decode__dev__v0
1650 scorers:
1651 - match
1652 - name: connect4
1653 dataset: connect4__s1__v1
1654 scorers:
1655 - match
1656 - name: stock-options-bull-call-spread
1657 dataset: stock_options_bull_call_spread__dev__v0
1658 scorers:
1659 - match
1660 - name: stock-options-bear-call-spread
1661 dataset: stock_options_bear_call_spread__dev__v0
1662 scorers:
1663 - match
1664 - name: stock-option-terms-bear-call-spread
1665 dataset: stock_option_terms_bear_call_spread__dev__v0
1666 scorers:
1667 - match
1668 - name: stock-option-terms-iron-butteryfly-spread
1669 dataset: stock_option_terms_iron_butterfly_spread__dev__v0
1670 scorers:
1671 - match
1672 - name: stock-option-terms-bull-call-spread
1673 dataset: stock_option_terms_bull_call_spread__dev__v0
1674 scorers:
1675 - match
1676 - name: stock-options-inverse-iron-condor-spread
1677 dataset: stock_options_inverse_iron_condor_spread__dev__v0
1678 scorers:
1679 - match
1680 - name: stock-options-iron-condor-spread
1681 dataset: stock_options_iron_condor_spread__dev__v0
1682 scorers:
1683 - match
1684 - name: stock-option-terms-iron-condor-spread
1685 dataset: stock_option_terms_iron_condor_spread__dev__v0
1686 scorers:
1687 - match
1688 - name: stock-options-inverse-iron-butterfly-spread
1689 dataset: stock_options_inverse_iron_butterfly_spread__dev__v0
1690 scorers:
1691 - match
1692 - name: stock-option-terms-inverse-iron-condor-spread
1693 dataset: stock_option_terms_inverse_iron_condor_spread__dev__v0
1694 scorers:
1695 - match
1696 - name: japanese_romantic_context
1697 dataset: japanese_romantic_context__dev__v0
1698 scorers:
1699 - match
1700 - name: phonetics-identify-words-needing-missing-gpcs
1701 dataset: phonetics_identify_words_needing_missing_gpcs__s1__simple_v0
1702 scorers:
1703 - match
1704 - name: prompt-injection
1705 dataset: prompt_injection__dev__v0
1706 scorers:
1707 - match
1708 - name: word_vector_over_reliance
1709 dataset: word_vector_over_reliance__dev__simple_v0
1710 scorers:
1711 - match
1712 - name: iso-to-lunar-calendar
1713 dataset: iso_to_lunar_calendar__dev__v0
1714 scorers:
1715 - match
1716 - name: lunar-calendar-to-iso
1717 dataset: lunar_calendar_to_iso__dev__v0
1718 scorers:
1719 - match
1720 - name: code_combination
1721 dataset: code_combination__dev__v0
1722 scorers:
1723 - match
1724 - name: partially_solved_crossword_clues
1725 dataset: partially_solved_crossword_clues__dev__v0
1726 scorers:
1727 - match
1728 - name: quartz
1729 dataset: quartz__test__v1
1730 scorers:
1731 - match
1732 - name: physics-interaction
1733 dataset: physics__interaction__dev__v0
1734 scorers:
1735 - match
1736 - name: next-val-series
1737 dataset: next_val_series__dev__simple_v0
1738 scorers:
1739 - match
1740
1name: spicepod
2version: v1beta1
3kind: Spicepod
4
5# This file is generate from the evals & data in the repo. Specifically using [evalconverter](https://github.com/spiceai/spiceai/tree/trunk/tools/evalconverter).
6# ```shell
7# evalconverter -i evals/registry/evals -b evals/registry/data
8#```
9
10datasets:
11 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/actors-sequence/samples.jsonl
12 # name: actors_sequence__dev__match_v1
13 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/adultery-state-laws/samples.jsonl
14 # name: adultery_state_laws__dev__v0
15 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/proofreader/samples.jsonl
16 # name: proofreader__dev__v0
17 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/rock-climbing/samples.jsonl
18 # name: rock_climbing__dev__v0
19 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/banking77/samples.jsonl
20 # name: match_banking77__test__v1
21 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/ukraine_gec/ukraine_gec_grammar_prep.jsonl
22 # name: ukraine_gec_grammar_prep__dev__v0
23 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/ukraine_gec/ukraine_gec_grammar_case.jsonl
24 # name: ukraine_gec_grammar_case__dev__v0
25 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/ukraine_gec/ukraine_gec_grammar_gender.jsonl
26 # name: ukraine_gec_grammar_gender__dev__v0
27 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/ukraine_gec/ukraine_gec_grammar_partvoice.jsonl
28 # name: ukraine_gec_grammar_partvoice__dev__v0
29 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/ukraine_gec/ukraine_gec_fluency_poorflow.jsonl
30 # name: ukraine_gec_fluency_poorflow__dev__v0
31 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/ukraine_gec/ukraine_gec_grammar_verbvoice.jsonl
32 # name: ukraine_gec_grammar_verbvoice__dev__v0
33 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/ukraine_gec/ukraine_gec_grammar_number.jsonl
34 # name: ukraine_gec_grammar_number__dev__v0
35 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/ukraine_gec/ukraine_gec_fluency_repetition.jsonl
36 # name: ukraine_gec_fluency_repetition__dev__v0
37 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/ukraine_gec/ukraine_gec_fluency_calque.jsonl
38 # name: ukraine_gec_fluency_calque__dev__v0
39 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/ukraine_gec/ukraine_gec_grammar_verbaform.jsonl
40 # name: ukraine_gec_grammar_verbaform__dev__v0
41 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/ukraine_gec/ukraine_gec_grammar_ungrammaticalstructure.jsonl
42 # name: ukraine_gec_grammar_ungrammaticalstructure__dev__v0
43 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/ukraine_gec/ukraine_gec_grammar_other.jsonl
44 # name: ukraine_gec_grammar_other__dev__v0
45 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/ukraine_gec/ukraine_gec_fluency_style.jsonl
46 # name: ukraine_gec_fluency_style__dev__v0
47 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/ukraine_gec/ukraine_gec_fluency_other.jsonl
48 # name: ukraine_gec_fluency_other__dev__v0
49 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/ukraine_gec/ukraine_gec_grammar_conjunction.jsonl
50 # name: ukraine_gec_grammar_conjunction__dev__v0
51 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/ukraine_gec/ukraine_gec_grammar_comparison.jsonl
52 # name: ukraine_gec_grammar_comparison__dev__v0
53 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/ukraine_gec/ukraine_gec_grammar_tense.jsonl
54 # name: ukraine_gec_grammar_tense__dev__v0
55 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/ukraine_gec/ukraine_gec_grammar_aspect.jsonl
56 # name: ukraine_gec_grammar_aspect__dev__v0
57 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/irish_plural_nouns/samples.jsonl
58 # name: irish_plural_nouns__dev__v0
59 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/shape_in_shape/shape_in_shape.jsonl
60 # name: shape_in_shape__dev__v1
61 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/russian_sarcasm/samples.jsonl
62 # name: russian_sarcasm__dev__v0
63 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/syllables_long_words/long_word_samples.jsonl
64 # name: syllables__dev__v1
65 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/crepe/samples.jsonl
66 # name: crepe__dev__v2
67 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/coq-proof-step/match.jsonl
68 # name: coq_proof_step_match__dev__v0
69 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/ukraine_eit/samples.jsonl
70 # name: ukraine_eit__val__v0
71 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/belarusian_proverbs/samples.jsonl
72 # name: belarusian_proverbs__dev__v0
73 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/invoices/match.jsonl
74 # name: invoices__dev__v0
75 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/urdu-lexicon/samples.jsonl
76 # name: urdu_lexicon__dev__v0
77 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/qa/q_and_a.jsonl
78 # name: qa__dev__v0
79 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/french-part-of-speech/samples.jsonl
80 # name: french_part_of_speech__dev__v0
81 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/internal_representations/samples.jsonl
82 # name: internal_representations__dev__v0
83 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/python_list_comprehension/samples.jsonl
84 # name: python_list_comprehension__dev__v0
85 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/nepali_numerals/samples.jsonl
86 # name: nepali_numerals__dev__v0
87 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/belarusian_syllable_count/samples.jsonl
88 # name: belarusian_syllable_count__dev__v0
89 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/mandaliof-table/samples.jsonl
90 # name: mandaliof_table__dev__v0
91 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/test_japanese_english_numerals/samples.jsonl
92 # name: test_japanese_english_numerals__dev__v0
93 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/tracking-shuffled-objects/samples.jsonl
94 # name: tracking_shuffled_objects__dev__v0
95 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/squares-gpt/square-samples.jsonl
96 # name: squares_gpt__dev__v0
97 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/convert-hex-hsl-lightness/samples.jsonl
98 # name: convert_hex_hsl_lightness__dev__v0
99 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/russe/samples.jsonl
100 # name: russe__test__v0
101 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/aba_mrpc_true_false/samples.jsonl
102 # name: aba_mrpc_true_false__dev__v0
103 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/logical_counting/samples.jsonl
104 # name: logical_counting__dev__v0
105 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/vigenere/samples.jsonl
106 # name: vigenere__s1__simple_v0
107 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/map-electronic-component-part-to-fact/samples.jsonl
108 # name: map_electronic_component_part_to_fact__dev__v0
109 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/rare-and-loanwords-dutch-lexicon/samples.jsonl
110 # name: rare_and_loanwords_dutch_lexicon__dev__v0
111 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/product-ie/fewshot/product_ie_one_shot_samples.jsonl
112 # name: product_information_extraction_one_shot__dev__v0
113 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/product-ie/zeroshot/product_ie_zero_shot_samples.jsonl
114 # name: product_information_extraction_zero_shot__dev__v0
115 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/sort_numeric/samples.jsonl
116 # name: sort_numbers__s1__simple_v0
117 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/product-matching/zeroshot/samples.jsonl
118 # name: match_product_matching_zeroshot__dev__v1
119 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/product-matching/fewshot/samples.jsonl
120 # name: match_product_matching_fewshot__dev__v1
121 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/product-matching/rules/samples.jsonl
122 # name: match_product_matching_rules__dev__v1
123 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/russian-lexicon/samples.jsonl
124 # name: russian_lexicon__dev__v0
125 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/dutch-lexicon/samples.jsonl
126 # name: dutch_lexicon__dev__v0
127 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/greek_nt_manuscripts/codes-sigla-centuries.jsonl
128 # name: greek_nt_manuscripts__v0
129 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/matrix_mult_rows/samples.jsonl
130 # name: matrix_mult_rows__dev__v0
131 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/moral_exceptQA/samples.jsonl
132 # name: moral_exceptqa__test__v1
133 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/music-theory/triads-samples.jsonl
134 # name: music_theory_triads_identification__dev__v0
135 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/music-theory/tetrads-samples.jsonl
136 # name: music_theory_tetrads_identification__dev__v0
137 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/find-thirukkural/samples.jsonl
138 # name: find_thirukkural__dev__v0
139 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/building_floorplan/samples.jsonl
140 # name: building_floorplan__test__v1
141 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/japanese-national-medical-exam01/japanese-national-medical-exam01.jsonl
142 # name: japanese_national_medical_exam01__dev__v0
143 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/lat_long_identify/samples.jsonl
144 # name: lat_long_identify__dev__v0
145 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/norwegian-lexicon/samples.jsonl
146 # name: norwegian_lexicon__dev__v0
147 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/german-part-of-speech/samples.jsonl
148 # name: german_part_of_speech__dev__v0
149 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/swedish_sat/samples.jsonl
150 # name: swedish_sat__dev__v0
151 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/utility_price_parsing/samples.jsonl
152 # name: utility_price_parsing__dev__v0
153 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/korean-consonant-vowel-combination/samples.jsonl
154 # name: korean_consonant_vowel_combination__dev__v0
155 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/mate-in-one/samples.jsonl
156 # name: mate_in_one__dev__v0
157 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/french-lexicon/samples.jsonl
158 # name: french_lexicon__dev__v0
159 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/swedish-spelling/samples.jsonl
160 # name: swedish_spelling__dev__v0
161 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/knot-theory/knot-theory-unknotting-numbers.jsonl
162 # name: knot_theory_unknotting_number__dev__v0
163 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/knot-theory/knot-theory-unknotting-problems.jsonl
164 # name: knot_theory_unknotting_problem__dev__v0
165 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/knot-theory/knot-theory-code-conversions.jsonl
166 # name: knot_theory_code_conversion__dev__v0
167 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/hindi_words/samples.jsonl
168 # name: hindi_words__dev__v0
169 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/arithmetical_puzzles/arithmetical_puzzles.jsonl
170 # name: arithmetical_puzzles__dev__v0
171 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/belarusian_antonyms/samples.jsonl
172 # name: belarusian_antonyms__dev__v0
173 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/body_movement/body_movement.jsonl
174 # name: body_movement__dev__zero_shot_v0
175 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/afrikaans-lexicon/samples.jsonl
176 # name: afrikaans_lexicon__dev__v0
177 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/cricket_situations/samples.jsonl
178 # name: cricket_situations__dev__v0
179 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/korean_spelling/samples.jsonl
180 # name: korean_spelling__dev__v0
181 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/rucola/samples.jsonl
182 # name: rucola__test__v0
183 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/logiqa-logical-reasoning-plus/reclor-logical-reasoning-plus.jsonl
184 # name: reclor_logical_reasoning_plus__dev__v0
185 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/logiqa-logical-reasoning-plus/logiqav2-logical-reasoning-plus.jsonl
186 # name: logiqav2_logical_reasoning_plus__dev__v0
187 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/logiqa-logical-reasoning-plus/logiqa-logical-reasoning-plus.jsonl
188 # name: logiqa_logical_reasoning_plus__dev__v0
189 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/medmcqa/samples.jsonl
190 # name: medmcqa__dev__v0
191 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/multi-step-equations/samples.jsonl
192 # name: multi_step_equations__dev__v0
193 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/islands/japanese_remote_island_to_prefecture.jsonl
194 # name: japanese_remote_island_to_prefecture__dev__v0
195 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/chinese_homonym/samples.jsonl
196 # name: chinese_homonym__dev__v0
197 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/norwegian-rhymes/samples.jsonl
198 # name: norwegian_rhymes__dev__v0
199 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/chinese_shi_jing/samples.jsonl
200 # name: chinese_shi_jing__test__v1
201 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/forth_stack_sim/basic_samples.jsonl
202 # name: forth_stack_sim_basic__dev__v0
203 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/forth_stack_sim/samples.jsonl
204 # name: forth_stack_sim__dev__v0
205 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/forth_stack_sim/detailed_samples.jsonl
206 # name: forth_stack_sim_detailed__dev__v0
207 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/japanese_city_name_pronunciation/samples.jsonl
208 # name: japanese_city_name_pronunciation__dev__v0
209 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/escher_sentences/samples.jsonl
210 # name: escher_sentences__dev__v0
211 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/track_objects/samples.jsonl
212 # name: track_objects__dev__v0
213 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/shopping_discount_comparison/samples.jsonl
214 # name: shopping_discount_comparison__dev__v0
215 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/test_comp_sci/questions.jsonl
216 # name: computer_science_problems__s1__simple_v0
217 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/mendelian_inheritance/samples.jsonl
218 # name: mendelian_inheritance__dev__v0
219 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/override-system-instruction/samples.jsonl
220 # name: override_system_instruction__dev__v0
221 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/poker_hand_ranks/full_samples.jsonl
222 # name: hand_ranks__test__v1
223 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/diabetes/samples.jsonl
224 # name: diabetes__dev__v0
225 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/job_listing_title_for_a_caregiver_in_japan/samples.jsonl
226 # name: job_listing_title_for_a_caregiver_in_japan__test__v1
227 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/poker_analysis/samples.jsonl
228 # name: poker_analysis__test__v1
229 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/belarusian_numerals/samples.jsonl
230 # name: belarusian_numerals__dev__v0
231 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/algebra_word_problems/samples.jsonl
232 # name: algebra_word_problems__s1__simple_v0
233 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/belarusian_grammar/samples.jsonl
234 # name: belarusian_grammar__dev__v0
235 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/svg_understanding/samples.jsonl
236 # name: svg_understanding__v0
237 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/cissp-study-questions/samples.jsonl
238 # name: cissp_study_questions__test__v1
239 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/linear_equations/samples.jsonl
240 # name: linear_equations__dev__v0
241 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/japanese_driving_license/samples.jsonl
242 # name: japanese_driving_license__s1__simple_v0
243 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/first-letters/samples.jsonl
244 # name: first_letters__dev__v0
245 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/arc/samples.jsonl
246 # name: arc__dev__v0
247 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/css-selectors/verbal.jsonl
248 # name: css_selectors_verbal__dev__v0
249 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/japanese-itpassport-exam01/japanese-itpassport-exam01.jsonl
250 # name: japanese_itpassport_exam01__dev__v0
251 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/logiqa/logiqa.jsonl
252 # name: logiqa__dev__v0
253 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/chinese_zodiac/samples.jsonl
254 # name: chinese_zodiac__dev__v0
255 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/spanish-lexicon/samples.jsonl
256 # name: spanish_lexicon__dev__v0
257 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/food/samples.jsonl
258 # name: food__test__v1
259 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/countries/samples.jsonl
260 # name: countries__dev__v0
261 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/which_is_heavier/which_is_heavier.jsonl
262 # name: which_is_heavier__dev__v0
263 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/korean_date_counting/samples.jsonl
264 # name: korean_date_counting__dev__v0
265 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/fcc_amateur_extra/samples.jsonl
266 # name: fcc_amateur_extra__dev__v0
267 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/multistep-word-problems/samples.jsonl
268 # name: multistep_word_problems__dev__v0
269 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/list_comparison_missing_name/samples.jsonl
270 # name: list_comparison_missing_name__dev__v0
271 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/newsology/samples.jsonl
272 # name: newsology__dev__v0
273 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/simple-visual-understanding/simple-visual-understanding.jsonl
274 # name: simple_visual_understanding__dev__v0
275 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/portuguese-syllable-count/samples.jsonl
276 # name: portuguese_syllable_count__dev__v0
277 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/south-african-bands/south-african-bands.jsonl
278 # name: south_african_bands__dev__v0
279 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/hebrew_plurals/samples.jsonl
280 # name: hebrew_plurals__dev__v0
281 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/rot13/rot13.jsonl
282 # name: rot13__s1__simple_v0
283 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/korean_dialects/samples.jsonl
284 # name: korean_dialects__dev__v0
285 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/test_time_zone_conversion/samples.v0.jsonl
286 # name: test_time_zone_conversion__dev__v0
287 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/music_theory/music_theory_chord_notes.jsonl
288 # name: music_theory_chord_notes__dev__v0
289 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/russian-english-homonym-context-resolution/samples.jsonl
290 # name: russian_english_homonym_context_resolution__dev__v0
291 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/number_reading/number_reading.jsonl
292 # name: number_reading__dev__v0
293 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/simple-knowledge-mongolian/samples.v0.jsonl
294 # name: simple_knowledge_mongolian__dev__v0
295 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/base64_decode/base64_decode.jsonl
296 # name: base64_decode_simple__dev__v0
297 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/urdu-transliteration/samples.jsonl
298 # name: urdu_transliteration__dev__v0
299 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/reverse-polish-notation/questions.jsonl
300 # name: reverse_polish_notation__dev__v0
301 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/music_theory/music_theory_chord_names.jsonl
302 # name: music_theory_chord_names__dev__v0
303 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/born_first/born_first.jsonl
304 # name: born_first__dev__v0
305 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/tetris/tetris.jsonl
306 # name: tetris__dev__v0
307 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/pure_korean/samples.jsonl
308 # name: pure_korean__dev__v0
309 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/determinant/samples.jsonl
310 # name: determinant__test__v1
311 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/split_chinese_characters/samples.jsonl
312 # name: split_chinese_characters__dev__v0
313 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/syntax-check/samples.jsonl
314 # name: syntax_check__dev__v1
315 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/balance_chemical_equation/samples.jsonl
316 # name: balance_chemical_equation__dev__v0
317 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/emotional-intelligence/samples.jsonl
318 # name: emotional_intelligence__dev__v0
319 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/nutrition/facts.jsonl
320 # name: nutrition__dev__v0
321 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/reverse-sort-words-eng/samples.jsonl
322 # name: reverse_sort_words_eng_simple__dev__v0
323 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/day-of-week-from-date/samples.jsonl
324 # name: day_of_week_from_date__dev__v0
325 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/regex-match/samples.jsonl
326 # name: regex__match__dev__v0
327 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/find-letter/samples.jsonl
328 # name: find_letter__dev__v0
329 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/korean_foreign_words/samples.jsonl
330 # name: korean_foreign_words__dev__v0
331 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/greek_vocabulary/samples.jsonl
332 # name: greek_vocabulary__dev__v0
333 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/rubiks-colors/samples.jsonl
334 # name: rubiks_colors__dev__v0
335 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/decrypt_caesar_cipher/samples.jsonl
336 # name: decrypt_caesar_cipher__dev__v0
337 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/us_tort_law/samples.jsonl
338 # name: us_tort_law__dev__v0
339 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/number_pattern/samples.jsonl
340 # name: number_pattern__dev__v0
341 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/confusing_korean/samples.jsonl
342 # name: confusing_korean__dev__v0
343 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/kanji-idioms/samples.jsonl
344 # name: kanji_idioms__test__v0
345 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/missing_operators/samples.jsonl
346 # name: missing_operators__s1__simple_v0
347 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/unsolvable_questions/samples.jsonl
348 # name: unsolvable_questions__dev__v0
349 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/portuguese-sarcasm/samples.jsonl
350 # name: portuguese_sarcasm__dev__v0
351 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/swap-words/samples.jsonl
352 # name: swap_words__dev__v0
353 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/hebrew_same_noun_gender/samples.jsonl
354 # name: hebrew_same_noun_gender__v0
355 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/heart-disease/samples.jsonl
356 # name: heart_disease__v0
357 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/last_word_nth/samples.jsonl
358 # name: last_word_nth__s1__simple_v0
359 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/ascii_wordart/ascii_wordart.jsonl
360 # name: ascii_wordart__dev__v0
361 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/direct-speech-tag/samples.jsonl
362 # name: direct_speech_tag__dev__v0
363 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/italian-new-words/samples.jsonl
364 # name: italian_new_words__dev__v0
365 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/irony/samples.jsonl
366 # name: irony__dev__v0
367 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/math_polish/samples.jsonl
368 # name: math_polish__dev__v0
369 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/irish-lexicon/samples.jsonl
370 # name: irish_lexicon__dev__v0
371 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/canto_wu_pronunciation/samples_zero.jsonl
372 # name: canto_wu_pronunciation__dev__v0
373 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/irrelevant-negative-diversion/irrelevant-negative-diversion.jsonl
374 # name: irrelevant_negative_diversion__dev__v0
375 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/invert_word_wise/invert.jsonl
376 # name: invert_word_wise__dev__v0
377 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/imperial_date_to_string/samples.jsonl
378 # name: imperial_date_to_string__dev__v0
379 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/gujarati_numerals/samples.jsonl
380 # name: gujarati_numerals__dev__v0
381 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/count_token_freq_dna/samples.jsonl
382 # name: count_token_freq_dna__dev__v0
383 # - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/french_homonym_and_homograph/samples.jsonl
384 # name: french_homonym_and_homograph__dev__v0
385 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/cube-pack/samples.jsonl
386 name: cube_pack__dev__v0
387 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/historical-kana-orthography-reading/samples.jsonl
388 name: historical_kana_orthography_reading__dev__v0
389 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/canto_wu_pronunciation/samples_few.jsonl
390 name: canto_wu_pronunciation_fewshot__dev__v0
391 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/accounting_audit/samples.jsonl
392 name: accounting_audit__dev__v0
393 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/brazilian-lexicon/samples.jsonl
394 name: brazilian_lexicon__dev__v0
395 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/naughty_strings/samples.jsonl
396 name: naughty_strings__test__v1
397 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/korean-phonetics/samples.jsonl
398 name: korean_phonetics__dev__v0
399 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/chinese_homophonic/chinese_homophonic.jsonl
400 name: chinese_homophonic__dev__v0
401 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/count_intersections_polynomial/samples.jsonl
402 name: count_intersections_polynomial__dev__v0
403 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/coqa/match.jsonl
404 name: coqa_match__dev__v0
405 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/latin_grammar/samples.jsonl
406 name: latin_grammar__dev__v0
407 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/bitwise/samples.jsonl
408 name: bitwise__dev__v0
409 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/shared_border/samples.jsonl
410 name: shared_borders__dev__v0
411 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/japanese-station/samples.jsonl
412 name: japanese_station__dev__v0
413 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/atpl_exams/samples.jsonl
414 name: atpl_exams__dev__v0
415 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/invoice_due_date_leap_day_adjustment/samples.jsonl
416 name: invoice_due_date_leap_day_adjustment__dev__v0
417 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/romanian_homonyms/samples.jsonl
418 name: romanian_homonyms__dev__v0
419 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/infiniteloop-match/infiniteloop-match.jsonl
420 name: infiniteloop_match__s1__simple_v0
421 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/russian-nlp-tasks/samples.jsonl
422 name: russian_nlp_tasks__dev__v0
423 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/chinese_chu_ci/samples.jsonl
424 name: chinese_chu_ci__dev__v0
425 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/polish-syllable-count/samples.jsonl
426 name: polish_syllable_count__val__v0
427 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/korean-postposition/samples.jsonl
428 name: korean_postposition__dev__v0
429 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/bulgarian-lexicon/samples.jsonl
430 name: bulgarian_lexicon__dev__v0
431 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/compare-countries-area/samples.jsonl
432 name: compare_countries_area__dev__v0
433 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/pattern_identification/samples.v0.jsonl
434 name: pattern_identification__dev__v0
435 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/belarusian_synonyms/samples.jsonl
436 name: belarusian_synonyms__dev__v0
437 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/spanish_feminine_noun_masculine_article/samples.jsonl
438 name: spanish_feminine_noun_masculine_article__dev__v0
439 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/sarcasm/samples.jsonl
440 name: sarcasm__test__v1
441 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/chinese_tang_poetries/sample.jsonl
442 name: chinese_tang_poetries__dev__match_v1
443 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/japanese_number_reading/japanese_number_reading.jsonl
444 name: japanese_number_reading__dev__v0
445 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/korean-honorific/samples.jsonl
446 name: korean_honorific__dev__v0
447 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/complex_replace_characters/samples.jsonl
448 name: complex_replace_characters__dev__v0
449 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/dice-rotation-sequence/samples.jsonl
450 name: dice_rotation_sequence__dev__v0
451 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/utah_real_estate/samples.jsonl
452 name: utah_real_estate__dev__v0
453 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/formal_logic/formal_logic_expressions.jsonl
454 name: formal_logic__dev__v0
455 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/resistor_ohm_calculator/samples.jsonl
456 name: resistor_ohm_calculator__dev__simple_v0
457 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/GOL/samples.jsonl
458 name: gol__dev__v1
459 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/icelandic-sentences-gec/samples.jsonl
460 name: icelandic_sentences_gec__dev__v0
461 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/chinese_modern_poem_identification/samples.jsonl
462 name: chinese_modern_poem_identification__test__v1
463 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/reverse_string/reverse_string.jsonl
464 name: reverse_string__s1__simple_v0
465 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/complex-analogies-en-ru/samples.jsonl
466 name: complex_analogies_en_ru__dev__v0
467 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/positive-binary-operations/samples.jsonl
468 name: positive_binary_operations__test__v1
469 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/hindi_shuddha/samples.jsonl
470 name: hindi_shuddha__dev__v0
471 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/tokyo-station-number/samples.jsonl
472 name: tokyo_station_number__dev__v0
473 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/chinese_famous_novel/samples.jsonl
474 name: chinese_famous_novel__dev__v0
475 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/diagrammatic_logic/samples.jsonl
476 name: diagrammatic_logic__dev__v2
477 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/polish-lexicon/samples.jsonl
478 name: polish_lexicon__dev__v0
479 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/wkt_understanding/samples.jsonl
480 name: wkt_understanding__dev__v0
481 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/japanese-national-medical-exam02/japanese-national-medical-exam02.jsonl
482 name: japanese_national_medical_exam02__dev__v0
483 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/cardinal-directions/samples.jsonl
484 name: cardinal_directions__dev__v0
485 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/rectangles/samples.jsonl
486 name: rectangles__dev__v0
487 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/hindi_upsc/samples.jsonl
488 name: hindi_upsc__dev__v0
489 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/three-pt-mapping/three_pt_mapping.jsonl
490 name: three_pt_mapping__dev__v0
491 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/polish-proverbs/samples.jsonl
492 name: polish_proverbs__dev__v0
493 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/indonesian_numbers/indonesian_numbers.jsonl
494 name: indonesian_numbers__dev__v0
495 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/chinese_song_ci/samples.jsonl
496 name: chinese_song_ci__dev__v0
497 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/cybersecurity/filepaths.jsonl
498 name: cybersecurity_filepaths__dev__v0
499 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/taxes/samples.jsonl
500 name: taxes__dev__v0
501 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/crontab/samples.jsonl
502 name: crontab__dev__v0
503 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/integer-sequence-predictions/misc-and-recent-sequences.jsonl
504 name: integer_sequence_predictions_misc__dev__v0
505 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/integer-sequence-predictions/obscure-sequences.jsonl
506 name: integer_sequence_predictions_obscure__dev__v0
507 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/integer-sequence-predictions/notable-sequences.jsonl
508 name: integer_sequence_predictions_notable__dev__v0
509 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/integer-sequence-predictions/samples.jsonl
510 name: integer_sequence_predictions__dev__v0
511 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/belarusian_orthography/samples.jsonl
512 name: belarusian_orthography__dev__v0
513 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/date-booking/samples.jsonl
514 name: date_booking__dev__v0
515 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/interlingual-homograph/samples.jsonl
516 name: interlingual_homograph__dev__v0
517 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/stats-tests/samples.jsonl
518 name: stats_tests__dev__v0
519 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/belarusian_russian_translation/samples.jsonl
520 name: belarusian_russian_translation__dev__v0
521 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/date-calculator/samples.jsonl
522 name: date_calculator__test__v1
523 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/chinese_poem/samples.jsonl
524 name: chinese_poem__dev__v0
525 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/belarusian_lexicon/samples.jsonl
526 name: belarusian_lexicon__dev__v0
527 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/test_english_pronunciations/samples.jsonl
528 name: test_english_pronunciations__dev__v0
529 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/anagrams/samples.jsonl
530 name: anagrams__test__v1
531 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/guess_the_singer/samples.jsonl
532 name: guess_the_singer__dev__v0
533 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/illinois-law/samples.jsonl
534 name: illinois_law__v0
535 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/russian_medical/samples.jsonl
536 name: russian_medical__dev__v0
537 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/bigrams/samples.jsonl
538 name: bigrams__dev__v0
539 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/probability_questions/probability_questions.jsonl
540 name: probability_questions__dev__v0
541 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/vintage_phone_keyboard_decode/samples.jsonl
542 name: vintage_phone_keyboard_decode__dev__v0
543 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/connect4/samples.jsonl
544 name: connect4__s1__v1
545 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/stock_options/stock_options_bull_call_spread.jsonl
546 name: stock_options_bull_call_spread__dev__v0
547 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/stock_options/stock_options_bear_call_spread.jsonl
548 name: stock_options_bear_call_spread__dev__v0
549 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/stock_options/stock_option_terms_bear_call_spread.jsonl
550 name: stock_option_terms_bear_call_spread__dev__v0
551 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/stock_options/stock_option_terms_iron_butterfly_spread.jsonl
552 name: stock_option_terms_iron_butterfly_spread__dev__v0
553 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/stock_options/stock_option_terms_bull_call_spread.jsonl
554 name: stock_option_terms_bull_call_spread__dev__v0
555 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/stock_options/stock_options_inverse_iron_condor_spread.jsonl
556 name: stock_options_inverse_iron_condor_spread__dev__v0
557 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/stock_options/stock_options_iron_condor_spread.jsonl
558 name: stock_options_iron_condor_spread__dev__v0
559 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/stock_options/stock_option_terms_iron_condor_spread.jsonl
560 name: stock_option_terms_iron_condor_spread__dev__v0
561 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/stock_options/stock_options_inverse_iron_butterfly_spread.jsonl
562 name: stock_options_inverse_iron_butterfly_spread__dev__v0
563 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/stock_options/stock_option_terms_inverse_iron_condor_spread.jsonl
564 name: stock_option_terms_inverse_iron_condor_spread__dev__v0
565 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/japanese_romantic_context/samples.jsonl
566 name: japanese_romantic_context__dev__v0
567 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/phonetics-identify-words-needing-missing-gpcs/samples.jsonl
568 name: phonetics_identify_words_needing_missing_gpcs__s1__simple_v0
569 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/prompt-injection/samples.jsonl
570 name: prompt_injection__dev__v0
571 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/word_vector_over_reliance/word_vector_over_reliance_samples.jsonl
572 name: word_vector_over_reliance__dev__simple_v0
573 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/lunar_calendar/iso_to_lunar_calendar.jsonl
574 name: iso_to_lunar_calendar__dev__v0
575 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/lunar_calendar/lunar_calendar_to_iso.jsonl
576 name: lunar_calendar_to_iso__dev__v0
577 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/code_combination/samples.jsonl
578 name: code_combination__dev__v0
579 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/partially_solved_crossword_clues/samples.jsonl
580 name: partially_solved_crossword_clues__dev__v0
581 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/quartz/samples.jsonl
582 name: quartz__test__v1
583 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/physics-interaction/samples.jsonl
584 name: physics__interaction__dev__v0
585 - from: https://github.com/Jeadie/evals/raw/refs/heads/main/evals/registry/data/next-val-series/next-val-series.jsonl
586 name: next_val_series__dev__simple_v0
587evals:
588 # - name: actors-sequence
589 # dataset: actors_sequence__dev__match_v1
590 # scorers:
591 # - match
592 # - name: adultery_state_laws
593 # dataset: adultery_state_laws__dev__v0
594 # scorers:
595 # - match
596 # - name: proofreader
597 # dataset: proofreader__dev__v0
598 # scorers:
599 # - match
600 # - name: rock-climbing
601 # dataset: rock_climbing__dev__v0
602 # scorers:
603 # - match
604 # - name: match_banking77
605 # dataset: match_banking77__test__v1
606 # scorers:
607 # - match
608 # - name: ukraine-gec-grammar-prep
609 # dataset: ukraine_gec_grammar_prep__dev__v0
610 # scorers:
611 # - match
612 # - name: ukraine-gec-grammar-case
613 # dataset: ukraine_gec_grammar_case__dev__v0
614 # scorers:
615 # - match
616 # - name: ukraine-gec-grammar-gender
617 # dataset: ukraine_gec_grammar_gender__dev__v0
618 # scorers:
619 # - match
620 # - name: ukraine-gec-grammar-partvoice
621 # dataset: ukraine_gec_grammar_partvoice__dev__v0
622 # scorers:
623 # - match
624 # - name: ukraine-gec-fluency-poorflow
625 # dataset: ukraine_gec_fluency_poorflow__dev__v0
626 # scorers:
627 # - match
628 # - name: ukraine-gec-grammar-verbvoice
629 # dataset: ukraine_gec_grammar_verbvoice__dev__v0
630 # scorers:
631 # - match
632 # - name: ukraine-gec-grammar-number
633 # dataset: ukraine_gec_grammar_number__dev__v0
634 # scorers:
635 # - match
636 # - name: ukraine-gec-fluency-repetition
637 # dataset: ukraine_gec_fluency_repetition__dev__v0
638 # scorers:
639 # - match
640 # - name: ukraine-gec-fluency-calque
641 # dataset: ukraine_gec_fluency_calque__dev__v0
642 # scorers:
643 # - match
644 # - name: ukraine-gec-grammar-verbaform
645 # dataset: ukraine_gec_grammar_verbaform__dev__v0
646 # scorers:
647 # - match
648 # - name: ukraine-gec-grammar-ungrammaticalstructure
649 # dataset: ukraine_gec_grammar_ungrammaticalstructure__dev__v0
650 # scorers:
651 # - match
652 # - name: ukraine-gec-grammar-other
653 # dataset: ukraine_gec_grammar_other__dev__v0
654 # scorers:
655 # - match
656 # - name: ukraine-gec-fluency-style
657 # dataset: ukraine_gec_fluency_style__dev__v0
658 # scorers:
659 # - match
660 # - name: ukraine-gec-fluency-other
661 # dataset: ukraine_gec_fluency_other__dev__v0
662 # scorers:
663 # - match
664 # - name: ukraine-gec-grammar-conjunction
665 # dataset: ukraine_gec_grammar_conjunction__dev__v0
666 # scorers:
667 # - match
668 # - name: ukraine-gec-grammar-comparison
669 # dataset: ukraine_gec_grammar_comparison__dev__v0
670 # scorers:
671 # - match
672 # - name: ukraine-gec-grammar-tense
673 # dataset: ukraine_gec_grammar_tense__dev__v0
674 # scorers:
675 # - match
676 # - name: ukraine-gec-grammar-aspect
677 # dataset: ukraine_gec_grammar_aspect__dev__v0
678 # scorers:
679 # - match
680 # - name: irish-plural-nouns
681 # dataset: irish_plural_nouns__dev__v0
682 # scorers:
683 # - match
684 # - name: shape-in-shape
685 # dataset: shape_in_shape__dev__v1
686 # scorers:
687 # - match
688 # - name: russian_sarcasm
689 # dataset: russian_sarcasm__dev__v0
690 # scorers:
691 # - match
692 # - name: syllables_long_words
693 # dataset: syllables__dev__v1
694 # scorers:
695 # - match
696 # - name: crepe
697 # dataset: crepe__dev__v2
698 # scorers:
699 # - match
700 # - name: coq-proof-step-match
701 # dataset: coq_proof_step_match__dev__v0
702 # scorers:
703 # - match
704 # - name: ukraine-eit
705 # dataset: ukraine_eit__val__v0
706 # scorers:
707 # - match
708 # - name: belarusian-proverbs
709 # dataset: belarusian_proverbs__dev__v0
710 # scorers:
711 # - match
712 # - name: invoices
713 # dataset: invoices__dev__v0
714 # scorers:
715 # - match
716 # - name: urdu-lexicon
717 # dataset: urdu_lexicon__dev__v0
718 # scorers:
719 # - match
720 # - name: qa
721 # dataset: qa__dev__v0
722 # scorers:
723 # - match
724 # - name: french-part-of-speech
725 # dataset: french_part_of_speech__dev__v0
726 # scorers:
727 # - match
728 # - name: internal_representations
729 # dataset: internal_representations__dev__v0
730 # scorers:
731 # - match
732 # - name: python_list_comprehension
733 # dataset: python_list_comprehension__dev__v0
734 # scorers:
735 # - match
736 # - name: nepali-numerals
737 # dataset: nepali_numerals__dev__v0
738 # scorers:
739 # - match
740 # - name: belarusian-syllable-count
741 # dataset: belarusian_syllable_count__dev__v0
742 # scorers:
743 # - match
744 # - name: mandaliof-table
745 # dataset: mandaliof_table__dev__v0
746 # scorers:
747 # - match
748 # - name: test_japanese_english_numerals
749 # dataset: test_japanese_english_numerals__dev__v0
750 # scorers:
751 # - match
752 # - name: tracking-shuffled-objects
753 # dataset: tracking_shuffled_objects__dev__v0
754 # scorers:
755 # - match
756 # - name: squares-gpt
757 # dataset: squares_gpt__dev__v0
758 # scorers:
759 # - match
760 # - name: convert-hex-hsl-lightness
761 # dataset: convert_hex_hsl_lightness__dev__v0
762 # scorers:
763 # - match
764 # - name: russe
765 # dataset: russe__test__v0
766 # scorers:
767 # - match
768 # - name: aba_mrpc_true_false
769 # dataset: aba_mrpc_true_false__dev__v0
770 # scorers:
771 # - match
772 # - name: logical_counting
773 # dataset: logical_counting__dev__v0
774 # scorers:
775 # - match
776 # - name: vigenere
777 # dataset: vigenere__s1__simple_v0
778 # scorers:
779 # - match
780 # - name: map-electronic-component-part-to-fact
781 # dataset: map_electronic_component_part_to_fact__dev__v0
782 # scorers:
783 # - match
784 # - name: rare-and-loanwords-dutch-lexicon
785 # dataset: rare_and_loanwords_dutch_lexicon__dev__v0
786 # scorers:
787 # - match
788 # - name: product_information_extraction_one_shot
789 # dataset: product_information_extraction_one_shot__dev__v0
790 # scorers:
791 # - match
792 # - name: product_information_extraction_zero_shot
793 # dataset: product_information_extraction_zero_shot__dev__v0
794 # scorers:
795 # - match
796 # - name: sort-numbers
797 # dataset: sort_numbers__s1__simple_v0
798 # scorers:
799 # - match
800 # - name: match_product-matching_zeroshot
801 # dataset: match_product_matching_zeroshot__dev__v1
802 # scorers:
803 # - match
804 # - name: match_product-matching_fewshot
805 # dataset: match_product_matching_fewshot__dev__v1
806 # scorers:
807 # - match
808 # - name: match_product-matching_rules
809 # dataset: match_product_matching_rules__dev__v1
810 # scorers:
811 # - match
812 # - name: russian-lexicon
813 # dataset: russian_lexicon__dev__v0
814 # scorers:
815 # - match
816 # - name: dutch-lexicon
817 # dataset: dutch_lexicon__dev__v0
818 # scorers:
819 # - match
820 # - name: greek-nt-manuscripts
821 # dataset: greek_nt_manuscripts__v0
822 # scorers:
823 # - match
824 # - name: matrix_mult_rows
825 # dataset: matrix_mult_rows__dev__v0
826 # scorers:
827 # - match
828 # - name: moral_exceptQA
829 # dataset: moral_exceptqa__test__v1
830 # scorers:
831 # - match
832 # - name: music-theory-triads-identification
833 # dataset: music_theory_triads_identification__dev__v0
834 # scorers:
835 # - match
836 # - name: music-theory-tetrads-identification
837 # dataset: music_theory_tetrads_identification__dev__v0
838 # scorers:
839 # - match
840 # - name: find-thirukkural
841 # dataset: find_thirukkural__dev__v0
842 # scorers:
843 # - match
844 # - name: building_floorplan
845 # dataset: building_floorplan__test__v1
846 # scorers:
847 # - match
848 # - name: japanese-national-medical-exam01
849 # dataset: japanese_national_medical_exam01__dev__v0
850 # scorers:
851 # - match
852 # - name: lat_long_identify
853 # dataset: lat_long_identify__dev__v0
854 # scorers:
855 # - match
856 # - name: norwegian-lexicon
857 # dataset: norwegian_lexicon__dev__v0
858 # scorers:
859 # - match
860 # - name: german-part-of-speech
861 # dataset: german_part_of_speech__dev__v0
862 # scorers:
863 # - match
864 # - name: swedish_sat
865 # dataset: swedish_sat__dev__v0
866 # scorers:
867 # - match
868 # - name: utility_price_parsing
869 # dataset: utility_price_parsing__dev__v0
870 # scorers:
871 # - match
872 # - name: korean-consonant-vowel-combination
873 # dataset: korean_consonant_vowel_combination__dev__v0
874 # scorers:
875 # - match
876 # - name: mate-in-one
877 # dataset: mate_in_one__dev__v0
878 # scorers:
879 # - match
880 # - name: french-lexicon
881 # dataset: french_lexicon__dev__v0
882 # scorers:
883 # - match
884 # - name: swedish-spelling
885 # dataset: swedish_spelling__dev__v0
886 # scorers:
887 # - match
888 # - name: knot-theory-unknotting-number
889 # dataset: knot_theory_unknotting_number__dev__v0
890 # scorers:
891 # - match
892 # - name: knot-theory-unknotting-problem
893 # dataset: knot_theory_unknotting_problem__dev__v0
894 # scorers:
895 # - match
896 # - name: knot-theory-code-conversion
897 # dataset: knot_theory_code_conversion__dev__v0
898 # scorers:
899 # - match
900 # - name: hindi_words
901 # dataset: hindi_words__dev__v0
902 # scorers:
903 # - match
904 # - name: arithmetical_puzzles
905 # dataset: arithmetical_puzzles__dev__v0
906 # scorers:
907 # - match
908 # - name: belarusian-antonyms
909 # dataset: belarusian_antonyms__dev__v0
910 # scorers:
911 # - match
912 # - name: body-movement
913 # dataset: body_movement__dev__zero_shot_v0
914 # scorers:
915 # - match
916 # - name: afrikaans-lexicon
917 # dataset: afrikaans_lexicon__dev__v0
918 # scorers:
919 # - match
920 # - name: cricket_situations
921 # dataset: cricket_situations__dev__v0
922 # scorers:
923 # - match
924 # - name: korean_spelling
925 # dataset: korean_spelling__dev__v0
926 # scorers:
927 # - match
928 # - name: rucola
929 # dataset: rucola__test__v0
930 # scorers:
931 # - match
932 # - name: reclor-logical-reasoning-plus
933 # dataset: reclor_logical_reasoning_plus__dev__v0
934 # scorers:
935 # - match
936 # - name: logiqav2-logical-reasoning-plus
937 # dataset: logiqav2_logical_reasoning_plus__dev__v0
938 # scorers:
939 # - match
940 # - name: logiqa-logical-reasoning-plus
941 # dataset: logiqa_logical_reasoning_plus__dev__v0
942 # scorers:
943 # - match
944 # - name: medmcqa
945 # dataset: medmcqa__dev__v0
946 # scorers:
947 # - match
948 # - name: multi-step-equations
949 # dataset: multi_step_equations__dev__v0
950 # scorers:
951 # - match
952 # - name: japanese-remote-island-to-prefecture
953 # dataset: japanese_remote_island_to_prefecture__dev__v0
954 # scorers:
955 # - match
956 # - name: chinese_homonym
957 # dataset: chinese_homonym__dev__v0
958 # scorers:
959 # - match
960 # - name: norwegian-rhymes
961 # dataset: norwegian_rhymes__dev__v0
962 # scorers:
963 # - match
964 # - name: chinese_shi_jing
965 # dataset: chinese_shi_jing__test__v1
966 # scorers:
967 # - match
968 # - name: forth-stack-sim-basic
969 # dataset: forth_stack_sim_basic__dev__v0
970 # scorers:
971 # - match
972 # - name: forth-stack-sim
973 # dataset: forth_stack_sim__dev__v0
974 # scorers:
975 # - match
976 # - name: forth-stack-sim-detailed
977 # dataset: forth_stack_sim_detailed__dev__v0
978 # scorers:
979 # - match
980 # - name: japanese_city_name_pronunciation
981 # dataset: japanese_city_name_pronunciation__dev__v0
982 # scorers:
983 # - match
984 # - name: escher-sentences
985 # dataset: escher_sentences__dev__v0
986 # scorers:
987 # - match
988 # - name: track_objects
989 # dataset: track_objects__dev__v0
990 # scorers:
991 # - match
992 # - name: shopping_discount_comparison
993 # dataset: shopping_discount_comparison__dev__v0
994 # scorers:
995 # - match
996 # - name: computer-science-problems
997 # dataset: computer_science_problems__s1__simple_v0
998 # scorers:
999 # - match
1000 # - name: mendelian_inheritance
1001 # dataset: mendelian_inheritance__dev__v0
1002 # scorers:
1003 # - match
1004 # - name: override-system-instruction
1005 # dataset: override_system_instruction__dev__v0
1006 # scorers:
1007 # - match
1008 # - name: hand_ranks-match
1009 # dataset: hand_ranks__test__v1
1010 # scorers:
1011 # - match
1012 # - name: diabetes
1013 # dataset: diabetes__dev__v0
1014 # scorers:
1015 # - match
1016 # - name: job_listing_title_for_a_caregiver_in_japan
1017 # dataset: job_listing_title_for_a_caregiver_in_japan__test__v1
1018 # scorers:
1019 # - match
1020 # - name: poker_analysis
1021 # dataset: poker_analysis__test__v1
1022 # scorers:
1023 # - match
1024 # - name: belarusian-numerals
1025 # dataset: belarusian_numerals__dev__v0
1026 # scorers:
1027 # - match
1028 # - name: algebra-word-problems
1029 # dataset: algebra_word_problems__s1__simple_v0
1030 # scorers:
1031 # - match
1032 # - name: belarusian-grammar
1033 # dataset: belarusian_grammar__dev__v0
1034 # scorers:
1035 # - match
1036 # - name: svg_understanding
1037 # dataset: svg_understanding__v0
1038 # scorers:
1039 # - match
1040 # - name: cissp-study-questions
1041 # dataset: cissp_study_questions__test__v1
1042 # scorers:
1043 # - match
1044 # - name: linear-equations
1045 # dataset: linear_equations__dev__v0
1046 # scorers:
1047 # - match
1048 # - name: japanese_driving_license
1049 # dataset: japanese_driving_license__s1__simple_v0
1050 # scorers:
1051 # - match
1052 # - name: first-letters
1053 # dataset: first_letters__dev__v0
1054 # scorers:
1055 # - match
1056 # - name: arc
1057 # dataset: arc__dev__v0
1058 # scorers:
1059 # - match
1060 # - name: css-selectors-verbal
1061 # dataset: css_selectors_verbal__dev__v0
1062 # scorers:
1063 # - match
1064 # - name: japanese-itpassport-exam01
1065 # dataset: japanese_itpassport_exam01__dev__v0
1066 # scorers:
1067 # - match
1068 # - name: logiqa
1069 # dataset: logiqa__dev__v0
1070 # scorers:
1071 # - match
1072 # - name: chinese_zodiac
1073 # dataset: chinese_zodiac__dev__v0
1074 # scorers:
1075 # - match
1076 # - name: spanish-lexicon
1077 # dataset: spanish_lexicon__dev__v0
1078 # scorers:
1079 # - match
1080 # - name: food
1081 # dataset: food__test__v1
1082 # scorers:
1083 # - match
1084 # - name: countries
1085 # dataset: countries__dev__v0
1086 # scorers:
1087 # - match
1088 # - name: which-is-heavier
1089 # dataset: which_is_heavier__dev__v0
1090 # scorers:
1091 # - match
1092 # - name: korean_date_counting
1093 # dataset: korean_date_counting__dev__v0
1094 # scorers:
1095 # - match
1096 # - name: fcc_amateur_extra
1097 # dataset: fcc_amateur_extra__dev__v0
1098 # scorers:
1099 # - match
1100 # - name: multistep-word-problems
1101 # dataset: multistep_word_problems__dev__v0
1102 # scorers:
1103 # - match
1104 # - name: list_comparison_missing_name
1105 # dataset: list_comparison_missing_name__dev__v0
1106 # scorers:
1107 # - match
1108 # - name: newsology
1109 # dataset: newsology__dev__v0
1110 # scorers:
1111 # - match
1112 # - name: simple-visual-understanding
1113 # dataset: simple_visual_understanding__dev__v0
1114 # scorers:
1115 # - match
1116 # - name: portuguese-syllable-count
1117 # dataset: portuguese_syllable_count__dev__v0
1118 # scorers:
1119 # - match
1120 # - name: south-african-bands
1121 # dataset: south_african_bands__dev__v0
1122 # scorers:
1123 # - match
1124 # - name: hebrew-plurals
1125 # dataset: hebrew_plurals__dev__v0
1126 # scorers:
1127 # - match
1128 # - name: rot13
1129 # dataset: rot13__s1__simple_v0
1130 # scorers:
1131 # - match
1132 # - name: korean_dialects
1133 # dataset: korean_dialects__dev__v0
1134 # scorers:
1135 # - match
1136 # - name: test-time-zone-conversion
1137 # dataset: test_time_zone_conversion__dev__v0
1138 # scorers:
1139 # - match
1140 # - name: music-theory-chord-notes
1141 # dataset: music_theory_chord_notes__dev__v0
1142 # scorers:
1143 # - match
1144 # - name: russian-english-homonym-context-resolution
1145 # dataset: russian_english_homonym_context_resolution__dev__v0
1146 # scorers:
1147 # - match
1148 # - name: number-reading
1149 # dataset: number_reading__dev__v0
1150 # scorers:
1151 # - match
1152 # - name: simple-knowledge-mongolian
1153 # dataset: simple_knowledge_mongolian__dev__v0
1154 # scorers:
1155 # - match
1156 # - name: base64-decode
1157 # dataset: base64_decode_simple__dev__v0
1158 # scorers:
1159 # - match
1160 # - name: urdu-transliteration
1161 # dataset: urdu_transliteration__dev__v0
1162 # scorers:
1163 # - match
1164 # - name: reverse-polish-notation
1165 # dataset: reverse_polish_notation__dev__v0
1166 # scorers:
1167 # - match
1168 # - name: music-theory-chord-names
1169 # dataset: music_theory_chord_names__dev__v0
1170 # scorers:
1171 # - match
1172 # - name: born-first
1173 # dataset: born_first__dev__v0
1174 # scorers:
1175 # - match
1176 # - name: tetris
1177 # dataset: tetris__dev__v0
1178 # scorers:
1179 # - match
1180 # - name: pure_korean
1181 # dataset: pure_korean__dev__v0
1182 # scorers:
1183 # - match
1184 # - name: determinant
1185 # dataset: determinant__test__v1
1186 # scorers:
1187 # - match
1188 # - name: split_chinese_characters
1189 # dataset: split_chinese_characters__dev__v0
1190 # scorers:
1191 # - match
1192 # - name: syntax-check
1193 # dataset: syntax_check__dev__v1
1194 # scorers:
1195 # - match
1196 # - name: balance-chemical-equation
1197 # dataset: balance_chemical_equation__dev__v0
1198 # scorers:
1199 # - match
1200 # - name: emotional-intelligence
1201 # dataset: emotional_intelligence__dev__v0
1202 # scorers:
1203 # - match
1204 # - name: nutrition
1205 # dataset: nutrition__dev__v0
1206 # scorers:
1207 # - match
1208 # - name: reverse-sort-words-eng
1209 # dataset: reverse_sort_words_eng_simple__dev__v0
1210 # scorers:
1211 # - match
1212 # - name: day-of-week-from-date
1213 # dataset: day_of_week_from_date__dev__v0
1214 # scorers:
1215 # - match
1216 # - name: regex-match
1217 # dataset: regex__match__dev__v0
1218 # scorers:
1219 # - match
1220 # - name: find-letter
1221 # dataset: find_letter__dev__v0
1222 # scorers:
1223 # - match
1224 # - name: korean_foreign_words
1225 # dataset: korean_foreign_words__dev__v0
1226 # scorers:
1227 # - match
1228 # - name: greek-vocabulary
1229 # dataset: greek_vocabulary__dev__v0
1230 # scorers:
1231 # - match
1232 # - name: rubiks-colors
1233 # dataset: rubiks_colors__dev__v0
1234 # scorers:
1235 # - match
1236 # - name: decrypt-caesar-cipher
1237 # dataset: decrypt_caesar_cipher__dev__v0
1238 # scorers:
1239 # - match
1240 # - name: us-tort-law
1241 # dataset: us_tort_law__dev__v0
1242 # scorers:
1243 # - match
1244 # - name: number-pattern
1245 # dataset: number_pattern__dev__v0
1246 # scorers:
1247 # - match
1248 # - name: confusing_korean
1249 # dataset: confusing_korean__dev__v0
1250 # scorers:
1251 # - match
1252 # - name: kanji-idioms
1253 # dataset: kanji_idioms__test__v0
1254 # scorers:
1255 # - match
1256 # - name: missing-operators
1257 # dataset: missing_operators__s1__simple_v0
1258 # scorers:
1259 # - match
1260 # - name: unsolvable_questions
1261 # dataset: unsolvable_questions__dev__v0
1262 # scorers:
1263 # - match
1264 # - name: portuguese-sarcasm
1265 # dataset: portuguese_sarcasm__dev__v0
1266 # scorers:
1267 # - match
1268 # - name: swap-words
1269 # dataset: swap_words__dev__v0
1270 # scorers:
1271 # - match
1272 # - name: hebrew-same-noun-gender
1273 # dataset: hebrew_same_noun_gender__v0
1274 # scorers:
1275 # - match
1276 # - name: heart-disease
1277 # dataset: heart_disease__v0
1278 # scorers:
1279 # - match
1280 # - name: last-word-nth
1281 # dataset: last_word_nth__s1__simple_v0
1282 # scorers:
1283 # - match
1284 # - name: ascii-wordart
1285 # dataset: ascii_wordart__dev__v0
1286 # scorers:
1287 # - match
1288 # - name: direct-speech-tag
1289 # dataset: direct_speech_tag__dev__v0
1290 # scorers:
1291 # - match
1292 # - name: italian-new-words
1293 # dataset: italian_new_words__dev__v0
1294 # scorers:
1295 # - match
1296 # - name: irony
1297 # dataset: irony__dev__v0
1298 # scorers:
1299 # - match
1300 # - name: math_polish
1301 # dataset: math_polish__dev__v0
1302 # scorers:
1303 # - match
1304 # - name: irish-lexicon
1305 # dataset: irish_lexicon__dev__v0
1306 # scorers:
1307 # - match
1308 # - name: canto_wu_pronunciation
1309 # dataset: canto_wu_pronunciation__dev__v0
1310 # scorers:
1311 # - match
1312 # - name: irrelevant-negative-diversion
1313 # dataset: irrelevant_negative_diversion__dev__v0
1314 # scorers:
1315 # - match
1316 # - name: invert_word_wise
1317 # dataset: invert_word_wise__dev__v0
1318 # scorers:
1319 # - match
1320 # - name: imperial_date_to_string
1321 # dataset: imperial_date_to_string__dev__v0
1322 # scorers:
1323 # - match
1324 # - name: gujarati-numerals
1325 # dataset: gujarati_numerals__dev__v0
1326 # scorers:
1327 # - match
1328 # - name: count_token_freq_dna
1329 # dataset: count_token_freq_dna__dev__v0
1330 # scorers:
1331 # - match
1332 # - name: french_homonym_and_homograph
1333 # dataset: french_homonym_and_homograph__dev__v0
1334 # scorers:
1335 # - match
1336 - name: cube-pack
1337 dataset: cube_pack__dev__v0
1338 scorers:
1339 - match
1340 - name: historical-kana-orthography-reading
1341 dataset: historical_kana_orthography_reading__dev__v0
1342 scorers:
1343 - match
1344 - name: canto_wu_pronunciation_fewshot
1345 dataset: canto_wu_pronunciation_fewshot__dev__v0
1346 scorers:
1347 - match
1348 - name: accounting_audit
1349 dataset: accounting_audit__dev__v0
1350 scorers:
1351 - match
1352 - name: brazilian-lexicon
1353 dataset: brazilian_lexicon__dev__v0
1354 scorers:
1355 - match
1356 - name: naughty_strings
1357 dataset: naughty_strings__test__v1
1358 scorers:
1359 - match
1360 - name: korean-phonetics
1361 dataset: korean_phonetics__dev__v0
1362 scorers:
1363 - match
1364 - name: chinese-homo
1365 dataset: chinese_homophonic__dev__v0
1366 scorers:
1367 - match
1368 - name: count_intersections_polynomial
1369 dataset: count_intersections_polynomial__dev__v0
1370 scorers:
1371 - match
1372 - name: coqa-match
1373 dataset: coqa_match__dev__v0
1374 scorers:
1375 - match
1376 - name: latin-grammar
1377 dataset: latin_grammar__dev__v0
1378 scorers:
1379 - match
1380 - name: bitwise
1381 dataset: bitwise__dev__v0
1382 scorers:
1383 - match
1384 - name: shared-borders
1385 dataset: shared_borders__dev__v0
1386 scorers:
1387 - match
1388 - name: japanese-station
1389 dataset: japanese_station__dev__v0
1390 scorers:
1391 - match
1392 - name: atpl_exams
1393 dataset: atpl_exams__dev__v0
1394 scorers:
1395 - match
1396 - name: invoice_due_date_leap_day_adjustment
1397 dataset: invoice_due_date_leap_day_adjustment__dev__v0
1398 scorers:
1399 - match
1400 - name: romanian_homonyms
1401 dataset: romanian_homonyms__dev__v0
1402 scorers:
1403 - match
1404 - name: infiniteloop-match
1405 dataset: infiniteloop_match__s1__simple_v0
1406 scorers:
1407 - match
1408 - name: russian-nlp-tasks
1409 dataset: russian_nlp_tasks__dev__v0
1410 scorers:
1411 - match
1412 - name: chinese_chu_ci
1413 dataset: chinese_chu_ci__dev__v0
1414 scorers:
1415 - match
1416 - name: polish-syllable-count
1417 dataset: polish_syllable_count__val__v0
1418 scorers:
1419 - match
1420 - name: korean-postposition
1421 dataset: korean_postposition__dev__v0
1422 scorers:
1423 - match
1424 - name: bulgarian-lexicon
1425 dataset: bulgarian_lexicon__dev__v0
1426 scorers:
1427 - match
1428 - name: compare-countries-area
1429 dataset: compare_countries_area__dev__v0
1430 scorers:
1431 - match
1432 - name: pattern_identification
1433 dataset: pattern_identification__dev__v0
1434 scorers:
1435 - match
1436 - name: belarusian-synonyms
1437 dataset: belarusian_synonyms__dev__v0
1438 scorers:
1439 - match
1440 - name: spanish_feminine_noun_masculine_article
1441 dataset: spanish_feminine_noun_masculine_article__dev__v0
1442 scorers:
1443 - match
1444 - name: sarcasm
1445 dataset: sarcasm__test__v1
1446 scorers:
1447 - match
1448 - name: chinese_tang_poetries
1449 dataset: chinese_tang_poetries__dev__match_v1
1450 scorers:
1451 - match
1452 - name: japanese-number-reading
1453 dataset: japanese_number_reading__dev__v0
1454 scorers:
1455 - match
1456 - name: korean-honorific
1457 dataset: korean_honorific__dev__v0
1458 scorers:
1459 - match
1460 - name: complex-replace-characters
1461 dataset: complex_replace_characters__dev__v0
1462 scorers:
1463 - match
1464 - name: dice-rotation-sequence
1465 dataset: dice_rotation_sequence__dev__v0
1466 scorers:
1467 - match
1468 - name: utah_real_estateh
1469 dataset: utah_real_estate__dev__v0
1470 scorers:
1471 - match
1472 - name: formal-logic
1473 dataset: formal_logic__dev__v0
1474 scorers:
1475 - match
1476 - name: resistor-ohm-calculator
1477 dataset: resistor_ohm_calculator__dev__simple_v0
1478 scorers:
1479 - match
1480 - name: gol
1481 dataset: gol__dev__v1
1482 scorers:
1483 - match
1484 - name: icelandic-sentences-gec
1485 dataset: icelandic_sentences_gec__dev__v0
1486 scorers:
1487 - match
1488 - name: chinese_modern_poem_identification
1489 dataset: chinese_modern_poem_identification__test__v1
1490 scorers:
1491 - match
1492 - name: reverse-string
1493 dataset: reverse_string__s1__simple_v0
1494 scorers:
1495 - match
1496 - name: complex-analogies-en-ru
1497 dataset: complex_analogies_en_ru__dev__v0
1498 scorers:
1499 - match
1500 - name: positive-binary-operations
1501 dataset: positive_binary_operations__test__v1
1502 scorers:
1503 - match
1504 - name: hindi_shuddha
1505 dataset: hindi_shuddha__dev__v0
1506 scorers:
1507 - match
1508 - name: tokyo-station-number
1509 dataset: tokyo_station_number__dev__v0
1510 scorers:
1511 - match
1512 - name: chinese_famous_novel
1513 dataset: chinese_famous_novel__dev__v0
1514 scorers:
1515 - match
1516 - name: diagrammatic_logic
1517 dataset: diagrammatic_logic__dev__v2
1518 scorers:
1519 - match
1520 - name: polish-lexicon
1521 dataset: polish_lexicon__dev__v0
1522 scorers:
1523 - match
1524 - name: wkt_understanding
1525 dataset: wkt_understanding__dev__v0
1526 scorers:
1527 - match
1528 - name: japanese-national-medical-exam02
1529 dataset: japanese_national_medical_exam02__dev__v0
1530 scorers:
1531 - match
1532 - name: cardinal-directions
1533 dataset: cardinal_directions__dev__v0
1534 scorers:
1535 - match
1536 - name: rectangles
1537 dataset: rectangles__dev__v0
1538 scorers:
1539 - match
1540 - name: hindi_upsc
1541 dataset: hindi_upsc__dev__v0
1542 scorers:
1543 - match
1544 - name: three-pt-mapping
1545 dataset: three_pt_mapping__dev__v0
1546 scorers:
1547 - match
1548 - name: polish-proverbs
1549 dataset: polish_proverbs__dev__v0
1550 scorers:
1551 - match
1552 - name: indonesian_numbers
1553 dataset: indonesian_numbers__dev__v0
1554 scorers:
1555 - match
1556 - name: chinese_song_ci
1557 dataset: chinese_song_ci__dev__v0
1558 scorers:
1559 - match
1560 - name: cybersecurity-filepaths
1561 dataset: cybersecurity_filepaths__dev__v0
1562 scorers:
1563 - match
1564 - name: taxes
1565 dataset: taxes__dev__v0
1566 scorers:
1567 - match
1568 - name: crontab
1569 dataset: crontab__dev__v0
1570 scorers:
1571 - match
1572 - name: integer-sequence-predictions-misc
1573 dataset: integer_sequence_predictions_misc__dev__v0
1574 scorers:
1575 - match
1576 - name: integer-sequence-predictions-obscure
1577 dataset: integer_sequence_predictions_obscure__dev__v0
1578 scorers:
1579 - match
1580 - name: integer-sequence-predictions-notable
1581 dataset: integer_sequence_predictions_notable__dev__v0
1582 scorers:
1583 - match
1584 - name: integer-sequence-predictions
1585 dataset: integer_sequence_predictions__dev__v0
1586 scorers:
1587 - match
1588 - name: belarusian-orthography
1589 dataset: belarusian_orthography__dev__v0
1590 scorers:
1591 - match
1592 - name: date-booking
1593 dataset: date_booking__dev__v0
1594 scorers:
1595 - match
1596 - name: interlingual-homograph
1597 dataset: interlingual_homograph__dev__v0
1598 scorers:
1599 - match
1600 - name: stats-tests
1601 dataset: stats_tests__dev__v0
1602 scorers:
1603 - match
1604 - name: belarusian-russian-translation
1605 dataset: belarusian_russian_translation__dev__v0
1606 scorers:
1607 - match
1608 - name: date-calculator
1609 dataset: date_calculator__test__v1
1610 scorers:
1611 - match
1612 - name: chinese_poem
1613 dataset: chinese_poem__dev__v0
1614 scorers:
1615 - match
1616 - name: belarusian-lexicon
1617 dataset: belarusian_lexicon__dev__v0
1618 scorers:
1619 - match
1620 - name: test_english_pronunciations
1621 dataset: test_english_pronunciations__dev__v0
1622 scorers:
1623 - match
1624 - name: anagrams
1625 dataset: anagrams__test__v1
1626 scorers:
1627 - match
1628 - name: guess-the-singer
1629 dataset: guess_the_singer__dev__v0
1630 scorers:
1631 - match
1632 - name: illinois-law
1633 dataset: illinois_law__v0
1634 scorers:
1635 - match
1636 - name: russian_medical
1637 dataset: russian_medical__dev__v0
1638 scorers:
1639 - match
1640 - name: bigrams
1641 dataset: bigrams__dev__v0
1642 scorers:
1643 - match
1644 - name: probability-questions
1645 dataset: probability_questions__dev__v0
1646 scorers:
1647 - match
1648 - name: vintage_phone_keyboard_decode
1649 dataset: vintage_phone_keyboard_decode__dev__v0
1650 scorers:
1651 - match
1652 - name: connect4
1653 dataset: connect4__s1__v1
1654 scorers:
1655 - match
1656 - name: stock-options-bull-call-spread
1657 dataset: stock_options_bull_call_spread__dev__v0
1658 scorers:
1659 - match
1660 - name: stock-options-bear-call-spread
1661 dataset: stock_options_bear_call_spread__dev__v0
1662 scorers:
1663 - match
1664 - name: stock-option-terms-bear-call-spread
1665 dataset: stock_option_terms_bear_call_spread__dev__v0
1666 scorers:
1667 - match
1668 - name: stock-option-terms-iron-butteryfly-spread
1669 dataset: stock_option_terms_iron_butterfly_spread__dev__v0
1670 scorers:
1671 - match
1672 - name: stock-option-terms-bull-call-spread
1673 dataset: stock_option_terms_bull_call_spread__dev__v0
1674 scorers:
1675 - match
1676 - name: stock-options-inverse-iron-condor-spread
1677 dataset: stock_options_inverse_iron_condor_spread__dev__v0
1678 scorers:
1679 - match
1680 - name: stock-options-iron-condor-spread
1681 dataset: stock_options_iron_condor_spread__dev__v0
1682 scorers:
1683 - match
1684 - name: stock-option-terms-iron-condor-spread
1685 dataset: stock_option_terms_iron_condor_spread__dev__v0
1686 scorers:
1687 - match
1688 - name: stock-options-inverse-iron-butterfly-spread
1689 dataset: stock_options_inverse_iron_butterfly_spread__dev__v0
1690 scorers:
1691 - match
1692 - name: stock-option-terms-inverse-iron-condor-spread
1693 dataset: stock_option_terms_inverse_iron_condor_spread__dev__v0
1694 scorers:
1695 - match
1696 - name: japanese_romantic_context
1697 dataset: japanese_romantic_context__dev__v0
1698 scorers:
1699 - match
1700 - name: phonetics-identify-words-needing-missing-gpcs
1701 dataset: phonetics_identify_words_needing_missing_gpcs__s1__simple_v0
1702 scorers:
1703 - match
1704 - name: prompt-injection
1705 dataset: prompt_injection__dev__v0
1706 scorers:
1707 - match
1708 - name: word_vector_over_reliance
1709 dataset: word_vector_over_reliance__dev__simple_v0
1710 scorers:
1711 - match
1712 - name: iso-to-lunar-calendar
1713 dataset: iso_to_lunar_calendar__dev__v0
1714 scorers:
1715 - match
1716 - name: lunar-calendar-to-iso
1717 dataset: lunar_calendar_to_iso__dev__v0
1718 scorers:
1719 - match
1720 - name: code_combination
1721 dataset: code_combination__dev__v0
1722 scorers:
1723 - match
1724 - name: partially_solved_crossword_clues
1725 dataset: partially_solved_crossword_clues__dev__v0
1726 scorers:
1727 - match
1728 - name: quartz
1729 dataset: quartz__test__v1
1730 scorers:
1731 - match
1732 - name: physics-interaction
1733 dataset: physics__interaction__dev__v0
1734 scorers:
1735 - match
1736 - name: next-val-series
1737 dataset: next_val_series__dev__simple_v0
1738 scorers:
1739 - match
1740