Example Browser

Sample narratives -- correct, incorrect, and hallucination failures | first 30/30/20

Correct 30

#1OK CORRECTActual: 130 | Predicted: 130

""

Top-3: 130, 133, 120
#2OK CORRECTActual: 130 | Predicted: 130

""

Top-3: 130, 120, 133
#3OK CORRECTActual: 179 | Predicted: 179

""

Top-3: 179, 174, 198
#4OK CORRECTActual: 130 | Predicted: 130

""

Top-3: 130, 120, 133
#5OK CORRECTActual: 110 | Predicted: 110

""

Top-3: 110, 121, 122
#6OK CORRECTActual: 130 | Predicted: 130

""

Top-3: 130, 120, 199
#7OK CORRECTActual: 130 | Predicted: 130

""

Top-3: 130, 199, 133
#8OK CORRECTActual: 121 | Predicted: 121

""

Top-3: 121, 122, 110
#9OK CORRECTActual: 110 | Predicted: 110

""

Top-3: 110, 121, 122
#10OK CORRECTActual: 130 | Predicted: 130

""

Top-3: 130, 120, 129
#11OK CORRECTActual: 130 | Predicted: 130

""

Top-3: 130, 133, 139
#12OK CORRECTActual: 130 | Predicted: 130

""

Top-3: 130, 133, 135
#13OK CORRECTActual: 110 | Predicted: 110

""

Top-3: 110, 121, 122
#14OK CORRECTActual: 166 | Predicted: 166

""

Top-3: 166, 174, 169
#15OK CORRECTActual: 130 | Predicted: 130

""

Top-3: 130, 133, 139
#16OK CORRECTActual: 171 | Predicted: 171

""

Top-3: 171, 173, 166
#17OK CORRECTActual: 190 | Predicted: 190

""

Top-3: 190, 198, 199
#18OK CORRECTActual: 130 | Predicted: 130

""

Top-3: 130, 139, 133
#19OK CORRECTActual: 130 | Predicted: 130

""

Top-3: 130, 133, 120
#20OK CORRECTActual: 130 | Predicted: 130

""

Top-3: 130, 133, 139
#21OK CORRECTActual: 130 | Predicted: 130

""

Top-3: 130, 133, 139
#22OK CORRECTActual: 110 | Predicted: 110

""

Top-3: 110, 121, 122
#23OK CORRECTActual: 171 | Predicted: 171

""

Top-3: 171, 174, 173
#24OK CORRECTActual: 110 | Predicted: 110

""

Top-3: 110, 121, 122
#25OK CORRECTActual: 121 | Predicted: 121

""

Top-3: 121, 122, 110
#26OK CORRECTActual: 166 | Predicted: 166

""

Top-3: 166, 164, 169
#27OK CORRECTActual: 130 | Predicted: 130

""

Top-3: 130, 120, 133
#28OK CORRECTActual: 130 | Predicted: 130

""

Top-3: 130, 133, 139
#29OK CORRECTActual: 110 | Predicted: 110

""

Top-3: 110, 121, 122
#30OK CORRECTActual: 130 | Predicted: 130

""

Top-3: 130, 133, 134

Incorrect 30

#1MISS INCORRECTActual: 141 | Predicted: 166

""

Top-3: 166, 164, 169
#2MISS INCORRECTActual: 121 | Predicted: 110

""

Top-3: 110, 121, 122
#3MISS INCORRECTActual: 179 | Predicted: 190

""

Top-3: 190, 198, 199
#4MISS INCORRECTActual: 130 | Predicted: 110

""

Top-3: 110, 121, 113
#5MISS INCORRECTActual: 112 | Predicted: 110

""

Top-3: 110, 113, 121
#6MISS INCORRECTActual: 113 | Predicted: 110

""

Top-3: 110, 121, 122
#7MISS INCORRECTActual: 171 | Predicted: 166

""

Top-3: 166, 164, 174
#8MISS INCORRECTActual: 111 | Predicted: 110

""

Top-3: 110
#9MISS INCORRECTActual: 121 | Predicted: 110

""

Top-3: 110, 121, 122
#10MISS INCORRECTActual: 133 | Predicted: 110

""

Top-3: 110, 121, 122
#11MISS INCORRECTActual: 121 | Predicted: 110

""

Top-3: 110, 121, 122
#12MISS INCORRECTActual: 113 | Predicted: 110

""

Top-3: 110, 121, 122
#13MISS INCORRECTActual: 121 | Predicted: 110

""

Top-3: 110, 121, 122
#14MISS INCORRECTActual: 184 | Predicted: 174

""

Top-3: 174, 184, 179
#15MISS INCORRECTActual: 132 | Predicted: 110

""

Top-3: 110, 121, 119
#16MISS INCORRECTActual: 191 | Predicted: 166

""

Top-3: 166, 174, 164
#17MISS INCORRECTActual: 121 | Predicted: 110

""

Top-3: 110, 121, 123
#18MISS INCORRECTActual: 121 | Predicted: 110

""

Top-3: 110, 121, 122
#19MISS INCORRECTActual: 137 | Predicted: 120

""

Top-3: 120, 130, 174
#20MISS INCORRECTActual: 113 | Predicted: 110

""

Top-3: 110, 121, 122
#21MISS INCORRECTActual: 121 | Predicted: 110

""

Top-3: 110, 121, 122
#22MISS INCORRECTActual: 121 | Predicted: 110

""

Top-3: 110, 121, 122
#23MISS INCORRECTActual: 137 | Predicted: 110

""

Top-3: 110, 121, 122
#24MISS INCORRECTActual: 160 | Predicted: 110

""

Top-3: 110, 121, 123
#25MISS INCORRECTActual: 135 | Predicted: 130

""

Top-3: 130, 120, 133
#26MISS INCORRECTActual: 131 | Predicted: 110

""

Top-3: 110, 121, 111
#27MISS INCORRECTActual: 137 | Predicted: 110

""

Top-3: 110, 121, 122
#28MISS INCORRECTActual: 172 | Predicted: 175

""

Top-3: 175, 174, 198
#29MISS INCORRECTActual: 131 | Predicted: 110

""

Top-3: 110, 121, 122
#30MISS INCORRECTActual: 174 | Predicted: 179

""

Top-3: 179, 198, 174

Hallucination Failures 20

#1HALLUCINATIONActual: 171 | Model output: NONE

""

Failure type: hallucination_off_taxonomy | Raw output: "NONE"
#2HALLUCINATIONActual: 174 | Model output: NONE

""

Failure type: hallucination_off_taxonomy | Raw output: "NONE"
#3HALLUCINATIONActual: 172 | Model output: NONE

""

Failure type: hallucination_off_taxonomy | Raw output: "NONE"
#4HALLUCINATIONActual: 100 | Model output: NONE

""

Failure type: hallucination_off_taxonomy | Raw output: "NONE"
#5HALLUCINATIONActual: 130 | Model output: NONE

""

Failure type: hallucination_off_taxonomy | Raw output: "NONE"
#6HALLUCINATIONActual: 121 | Model output: NONE

""

Failure type: hallucination_off_taxonomy | Raw output: "NONE"
#7HALLUCINATIONActual: 173 | Model output: NONE

""

Failure type: hallucination_off_taxonomy | Raw output: "NONE"
#8HALLUCINATIONActual: 183 | Model output: NONE

""

Failure type: hallucination_off_taxonomy | Raw output: "NONE"
#9HALLUCINATIONActual: 198 | Model output: NONE

""

Failure type: hallucination_off_taxonomy | Raw output: "NONE"
#10HALLUCINATIONActual: 171 | Model output: NONE

""

Failure type: hallucination_off_taxonomy | Raw output: "NONE"
#11HALLUCINATIONActual: 147 | Model output: NONE

""

Failure type: hallucination_off_taxonomy | Raw output: "NONE"
#12HALLUCINATIONActual: 172 | Model output: NONE

""

Failure type: hallucination_off_taxonomy | Raw output: "NONE"
#13HALLUCINATIONActual: 135 | Model output: NONE

""

Failure type: hallucination_off_taxonomy | Raw output: "NONE"
#14HALLUCINATIONActual: 130 | Model output: NONE

""

Failure type: hallucination_off_taxonomy | Raw output: "080"
#15HALLUCINATIONActual: 166 | Model output: NONE

""

Failure type: hallucination_off_taxonomy | Raw output: "NONE"
#16HALLUCINATIONActual: 106 | Model output: NONE

""

Failure type: hallucination_off_taxonomy | Raw output: "NONE"
#17HALLUCINATIONActual: 110 | Model output: NONE

""

Failure type: hallucination_off_taxonomy | Raw output: "NONE"
#18HALLUCINATIONActual: 130 | Model output: NONE

""

Failure type: hallucination_off_taxonomy | Raw output: "NONE"
#19HALLUCINATIONActual: 121 | Model output: NONE

""

Failure type: hallucination_off_taxonomy | Raw output: "NONE"
#20HALLUCINATIONActual: 130 | Model output: NONE

""

Failure type: hallucination_off_taxonomy | Raw output: "NONE"