Example Browser
Sample narratives -- correct, incorrect, and hallucination failures | first 30/30/20
Correct 30
#1OK CORRECTActual:
130 | Predicted: 130""
Top-3: 130, 133, 120
#2OK CORRECTActual:
130 | Predicted: 130""
Top-3: 130, 120, 133
#3OK CORRECTActual:
179 | Predicted: 179""
Top-3: 179, 174, 198
#4OK CORRECTActual:
130 | Predicted: 130""
Top-3: 130, 120, 133
#5OK CORRECTActual:
110 | Predicted: 110""
Top-3: 110, 121, 122
#6OK CORRECTActual:
130 | Predicted: 130""
Top-3: 130, 120, 199
#7OK CORRECTActual:
130 | Predicted: 130""
Top-3: 130, 199, 133
#8OK CORRECTActual:
121 | Predicted: 121""
Top-3: 121, 122, 110
#9OK CORRECTActual:
110 | Predicted: 110""
Top-3: 110, 121, 122
#10OK CORRECTActual:
130 | Predicted: 130""
Top-3: 130, 120, 129
#11OK CORRECTActual:
130 | Predicted: 130""
Top-3: 130, 133, 139
#12OK CORRECTActual:
130 | Predicted: 130""
Top-3: 130, 133, 135
#13OK CORRECTActual:
110 | Predicted: 110""
Top-3: 110, 121, 122
#14OK CORRECTActual:
166 | Predicted: 166""
Top-3: 166, 174, 169
#15OK CORRECTActual:
130 | Predicted: 130""
Top-3: 130, 133, 139
#16OK CORRECTActual:
171 | Predicted: 171""
Top-3: 171, 173, 166
#17OK CORRECTActual:
190 | Predicted: 190""
Top-3: 190, 198, 199
#18OK CORRECTActual:
130 | Predicted: 130""
Top-3: 130, 139, 133
#19OK CORRECTActual:
130 | Predicted: 130""
Top-3: 130, 133, 120
#20OK CORRECTActual:
130 | Predicted: 130""
Top-3: 130, 133, 139
#21OK CORRECTActual:
130 | Predicted: 130""
Top-3: 130, 133, 139
#22OK CORRECTActual:
110 | Predicted: 110""
Top-3: 110, 121, 122
#23OK CORRECTActual:
171 | Predicted: 171""
Top-3: 171, 174, 173
#24OK CORRECTActual:
110 | Predicted: 110""
Top-3: 110, 121, 122
#25OK CORRECTActual:
121 | Predicted: 121""
Top-3: 121, 122, 110
#26OK CORRECTActual:
166 | Predicted: 166""
Top-3: 166, 164, 169
#27OK CORRECTActual:
130 | Predicted: 130""
Top-3: 130, 120, 133
#28OK CORRECTActual:
130 | Predicted: 130""
Top-3: 130, 133, 139
#29OK CORRECTActual:
110 | Predicted: 110""
Top-3: 110, 121, 122
#30OK CORRECTActual:
130 | Predicted: 130""
Top-3: 130, 133, 134
Incorrect 30
#1MISS INCORRECTActual:
141 | Predicted: 166""
Top-3: 166, 164, 169
#2MISS INCORRECTActual:
121 | Predicted: 110""
Top-3: 110, 121, 122
#3MISS INCORRECTActual:
179 | Predicted: 190""
Top-3: 190, 198, 199
#4MISS INCORRECTActual:
130 | Predicted: 110""
Top-3: 110, 121, 113
#5MISS INCORRECTActual:
112 | Predicted: 110""
Top-3: 110, 113, 121
#6MISS INCORRECTActual:
113 | Predicted: 110""
Top-3: 110, 121, 122
#7MISS INCORRECTActual:
171 | Predicted: 166""
Top-3: 166, 164, 174
#8MISS INCORRECTActual:
111 | Predicted: 110""
Top-3: 110
#9MISS INCORRECTActual:
121 | Predicted: 110""
Top-3: 110, 121, 122
#10MISS INCORRECTActual:
133 | Predicted: 110""
Top-3: 110, 121, 122
#11MISS INCORRECTActual:
121 | Predicted: 110""
Top-3: 110, 121, 122
#12MISS INCORRECTActual:
113 | Predicted: 110""
Top-3: 110, 121, 122
#13MISS INCORRECTActual:
121 | Predicted: 110""
Top-3: 110, 121, 122
#14MISS INCORRECTActual:
184 | Predicted: 174""
Top-3: 174, 184, 179
#15MISS INCORRECTActual:
132 | Predicted: 110""
Top-3: 110, 121, 119
#16MISS INCORRECTActual:
191 | Predicted: 166""
Top-3: 166, 174, 164
#17MISS INCORRECTActual:
121 | Predicted: 110""
Top-3: 110, 121, 123
#18MISS INCORRECTActual:
121 | Predicted: 110""
Top-3: 110, 121, 122
#19MISS INCORRECTActual:
137 | Predicted: 120""
Top-3: 120, 130, 174
#20MISS INCORRECTActual:
113 | Predicted: 110""
Top-3: 110, 121, 122
#21MISS INCORRECTActual:
121 | Predicted: 110""
Top-3: 110, 121, 122
#22MISS INCORRECTActual:
121 | Predicted: 110""
Top-3: 110, 121, 122
#23MISS INCORRECTActual:
137 | Predicted: 110""
Top-3: 110, 121, 122
#24MISS INCORRECTActual:
160 | Predicted: 110""
Top-3: 110, 121, 123
#25MISS INCORRECTActual:
135 | Predicted: 130""
Top-3: 130, 120, 133
#26MISS INCORRECTActual:
131 | Predicted: 110""
Top-3: 110, 121, 111
#27MISS INCORRECTActual:
137 | Predicted: 110""
Top-3: 110, 121, 122
#28MISS INCORRECTActual:
172 | Predicted: 175""
Top-3: 175, 174, 198
#29MISS INCORRECTActual:
131 | Predicted: 110""
Top-3: 110, 121, 122
#30MISS INCORRECTActual:
174 | Predicted: 179""
Top-3: 179, 198, 174
Hallucination Failures 20
#1HALLUCINATIONActual:
171 | Model output: NONE""
Failure type: hallucination_off_taxonomy | Raw output: "NONE"
#2HALLUCINATIONActual:
174 | Model output: NONE""
Failure type: hallucination_off_taxonomy | Raw output: "NONE"
#3HALLUCINATIONActual:
172 | Model output: NONE""
Failure type: hallucination_off_taxonomy | Raw output: "NONE"
#4HALLUCINATIONActual:
100 | Model output: NONE""
Failure type: hallucination_off_taxonomy | Raw output: "NONE"
#5HALLUCINATIONActual:
130 | Model output: NONE""
Failure type: hallucination_off_taxonomy | Raw output: "NONE"
#6HALLUCINATIONActual:
121 | Model output: NONE""
Failure type: hallucination_off_taxonomy | Raw output: "NONE"
#7HALLUCINATIONActual:
173 | Model output: NONE""
Failure type: hallucination_off_taxonomy | Raw output: "NONE"
#8HALLUCINATIONActual:
183 | Model output: NONE""
Failure type: hallucination_off_taxonomy | Raw output: "NONE"
#9HALLUCINATIONActual:
198 | Model output: NONE""
Failure type: hallucination_off_taxonomy | Raw output: "NONE"
#10HALLUCINATIONActual:
171 | Model output: NONE""
Failure type: hallucination_off_taxonomy | Raw output: "NONE"
#11HALLUCINATIONActual:
147 | Model output: NONE""
Failure type: hallucination_off_taxonomy | Raw output: "NONE"
#12HALLUCINATIONActual:
172 | Model output: NONE""
Failure type: hallucination_off_taxonomy | Raw output: "NONE"
#13HALLUCINATIONActual:
135 | Model output: NONE""
Failure type: hallucination_off_taxonomy | Raw output: "NONE"
#14HALLUCINATIONActual:
130 | Model output: NONE""
Failure type: hallucination_off_taxonomy | Raw output: "080"
#15HALLUCINATIONActual:
166 | Model output: NONE""
Failure type: hallucination_off_taxonomy | Raw output: "NONE"
#16HALLUCINATIONActual:
106 | Model output: NONE""
Failure type: hallucination_off_taxonomy | Raw output: "NONE"
#17HALLUCINATIONActual:
110 | Model output: NONE""
Failure type: hallucination_off_taxonomy | Raw output: "NONE"
#18HALLUCINATIONActual:
130 | Model output: NONE""
Failure type: hallucination_off_taxonomy | Raw output: "NONE"
#19HALLUCINATIONActual:
121 | Model output: NONE""
Failure type: hallucination_off_taxonomy | Raw output: "NONE"
#20HALLUCINATIONActual:
130 | Model output: NONE""
Failure type: hallucination_off_taxonomy | Raw output: "NONE"