For the example above, models should identify all ten editorial corrections, producing output like:
{
"image": "001.png",
"page_number": 5,
"source": "Little Dorrit",
"annotator": "pairsys",
"annotation_date": "2025-04-04",
"verified": true,
"edits": [
{
"type": "punctuation",
"original_text": "church bells",
"corrected_text": "church bells,",
"line_number": 2,
"page": "001.png"
},
{
"type": "punctuation",
"original_text": "wine bottles",
"corrected_text": "wine-bottles",
"line_number": 11,
"page": "001.png"
},
{
"type": "punctuation",
"original_text": "got through",
"corrected_text": "got, through",
"line_number": 14,
"page": "001.png"
},
{
"type": "punctuation",
"original_text": "iron bars fashioned",
"corrected_text": "iron bars, fashioned",
"line_number": 14,
"page": "001.png"
},
{
"type": "punctuation",
"original_text": "grating where",
"corrected_text": "grating, where",
"line_number": 17,
"page": "001.png"
},
{
"type": "punctuation",
"original_text": "outside and",
"corrected_text": "outside; and",
"line_number": 29,
"page": "001.png"
},
{
"type": "punctuation",
"original_text": "intact in",
"corrected_text": "intact, in",
"line_number": 30,
"page": "001.png"
},
{
"type": "capitalization",
"original_text": "indian ocean",
"corrected_text": "Indian Ocean",
"line_number": 31,
"page": "001.png"
},
{
"type": "punctuation",
"original_text": "was waiting to be fed looking",
"corrected_text": "was waiting to be fed; looking",
"line_number": 36,
"page": "001.png"
},
{
"type": "punctuation",
"original_text": "bars that",
"corrected_text": "bars, that",
"line_number": 36,
"page": "001.png"
}
]
}