mirror of
https://github.com/open-compass/opencompass.git
synced 2025-05-30 16:03:24 +08:00
77 lines
3.2 KiB
Python
77 lines
3.2 KiB
Python
from typing import List, Tuple
|
|
from modules.alignment import read_cilin, read_confusion, Alignment
|
|
from modules.merger import Merger
|
|
from modules.classifier import Classifier
|
|
|
|
class Annotator:
|
|
def __init__(self,
|
|
align: Alignment,
|
|
merger: Merger,
|
|
classifier: Classifier,
|
|
granularity: str = "word",
|
|
strategy: str = "first"):
|
|
self.align = align
|
|
self.merger = merger
|
|
self.classifier = classifier
|
|
self.granularity = granularity
|
|
self.strategy = strategy
|
|
|
|
@classmethod
|
|
def create_default(cls, granularity: str = "word", strategy: str = "first"):
|
|
"""
|
|
Default parameters used in the paper
|
|
"""
|
|
semantic_dict, semantic_class = read_cilin()
|
|
confusion_dict = read_confusion()
|
|
align = Alignment(semantic_dict, confusion_dict, granularity)
|
|
merger = Merger(granularity)
|
|
classifier = Classifier(granularity)
|
|
return cls(align, merger, classifier, granularity, strategy)
|
|
|
|
def __call__(self,
|
|
src: List[Tuple],
|
|
tgt: List[Tuple],
|
|
annotator_id: int = 0,
|
|
verbose: bool = False):
|
|
"""
|
|
Align sentences and annotate them with error type information
|
|
"""
|
|
src_tokens = [x[0] for x in src]
|
|
tgt_tokens = [x[0] for x in tgt]
|
|
src_str = "".join(src_tokens)
|
|
tgt_str = "".join(tgt_tokens)
|
|
# convert to text form
|
|
annotations_out = ["S " + " ".join(src_tokens) + "\n"]
|
|
if tgt_str == "没有错误" or src_str == tgt_str: # Error Free Case
|
|
annotations_out.append(f"T{annotator_id} 没有错误\n")
|
|
cors = [tgt_str]
|
|
op, toks, inds = "noop", "-NONE-", (-1, -1)
|
|
a_str = f"A {inds[0]} {inds[1]}|||{op}|||{toks}|||REQUIRED|||-NONE-|||{annotator_id}\n"
|
|
annotations_out.append(a_str)
|
|
elif tgt_str == "无法标注": # Not Annotatable Case
|
|
annotations_out.append(f"T{annotator_id} 无法标注\n")
|
|
cors = [tgt_str]
|
|
op, toks, inds = "NA", "-NONE-", (-1, -1)
|
|
a_str = f"A {inds[0]} {inds[1]}|||{op}|||{toks}|||REQUIRED|||-NONE-|||{annotator_id}\n"
|
|
annotations_out.append(a_str)
|
|
else: # Other
|
|
align_objs = self.align(src, tgt)
|
|
edit_objs = []
|
|
align_idx = 0
|
|
if self.strategy == "first":
|
|
align_objs = align_objs[:1]
|
|
for align_obj in align_objs:
|
|
edits = self.merger(align_obj, src, tgt, verbose)
|
|
if edits not in edit_objs:
|
|
edit_objs.append(edits)
|
|
annotations_out.append(f"T{annotator_id}-A{align_idx} " + " ".join(tgt_tokens) + "\n")
|
|
align_idx += 1
|
|
cors = self.classifier(src, tgt, edits, verbose)
|
|
# annotations_out = []
|
|
for cor in cors:
|
|
op, toks, inds = cor.op, cor.toks, cor.inds
|
|
a_str = f"A {inds[0]} {inds[1]}|||{op}|||{toks}|||REQUIRED|||-NONE-|||{annotator_id}\n"
|
|
annotations_out.append(a_str)
|
|
annotations_out.append("\n")
|
|
return annotations_out, cors
|