feat: 对话补全训练数据生成

This commit is contained in:
suojiayi 2025-05-13 06:39:28 +00:00
parent ee5f4585c9
commit 89f10d841d

View File

@ -152,12 +152,12 @@ def merge_jsonl_files(file1, file2, output_file):
record = json.loads(line.strip())
index = record.get('data_idx')
cluster_center = record.get('cluster_center')
#embedding = record.get('embedding')
embedding = record.get('embedding')
# 如果'index'存在于第一个文件的'uid'中,则合并数据
if index in data_dict:
data_dict[index]['cluster_center'] = cluster_center
#data_dict[index]['embedding'] = embedding
data_dict[index]['embedding'] = embedding
# 将合并后的数据写入输出文件
with open(output_file, 'w', encoding='utf-8') as out_f:
@ -195,12 +195,9 @@ def merge_jsonl_files(file1, file2, output_file):
record = json.loads(line.strip())
index = record.get('uid')
score = record.get('answer')
embedding = record.get('embedding')
# 如果'index'存在于第一个文件的'uid'中,则合并数据
if index in data_dict:
data_dict[index]['score'] = score
data_dict[index]['embedding'] = embedding
# 将合并后的数据写入输出文件
with open(output_file, 'w', encoding='utf-8') as out_f: