feat: 对话补全训练数据生成
This commit is contained in:
parent
ee5f4585c9
commit
89f10d841d
@ -152,12 +152,12 @@ def merge_jsonl_files(file1, file2, output_file):
|
|||||||
record = json.loads(line.strip())
|
record = json.loads(line.strip())
|
||||||
index = record.get('data_idx')
|
index = record.get('data_idx')
|
||||||
cluster_center = record.get('cluster_center')
|
cluster_center = record.get('cluster_center')
|
||||||
#embedding = record.get('embedding')
|
embedding = record.get('embedding')
|
||||||
|
|
||||||
# 如果'index'存在于第一个文件的'uid'中,则合并数据
|
# 如果'index'存在于第一个文件的'uid'中,则合并数据
|
||||||
if index in data_dict:
|
if index in data_dict:
|
||||||
data_dict[index]['cluster_center'] = cluster_center
|
data_dict[index]['cluster_center'] = cluster_center
|
||||||
#data_dict[index]['embedding'] = embedding
|
data_dict[index]['embedding'] = embedding
|
||||||
|
|
||||||
# 将合并后的数据写入输出文件
|
# 将合并后的数据写入输出文件
|
||||||
with open(output_file, 'w', encoding='utf-8') as out_f:
|
with open(output_file, 'w', encoding='utf-8') as out_f:
|
||||||
@ -195,12 +195,9 @@ def merge_jsonl_files(file1, file2, output_file):
|
|||||||
record = json.loads(line.strip())
|
record = json.loads(line.strip())
|
||||||
index = record.get('uid')
|
index = record.get('uid')
|
||||||
score = record.get('answer')
|
score = record.get('answer')
|
||||||
embedding = record.get('embedding')
|
|
||||||
|
|
||||||
# 如果'index'存在于第一个文件的'uid'中,则合并数据
|
# 如果'index'存在于第一个文件的'uid'中,则合并数据
|
||||||
if index in data_dict:
|
if index in data_dict:
|
||||||
data_dict[index]['score'] = score
|
data_dict[index]['score'] = score
|
||||||
data_dict[index]['embedding'] = embedding
|
|
||||||
|
|
||||||
# 将合并后的数据写入输出文件
|
# 将合并后的数据写入输出文件
|
||||||
with open(output_file, 'w', encoding='utf-8') as out_f:
|
with open(output_file, 'w', encoding='utf-8') as out_f:
|
||||||
|
Loading…
Reference in New Issue
Block a user