import json # 1. 读取 K-Means 聚类结果文件(JSON 格式),构建 {data_idx: cluster_center} 的字典 kmeans_data = {} with open('/data/zhaochsh01/buquan/12345/kmeans/123451wfilter_cluster_kmeans_result.jsonl', 'r', encoding='utf-8') as f: for line in f: line = line.strip() if not line: continue try: entry = json.loads(line) data_idx = entry.get("data_idx") # 获取 data_idx cluster_center = entry.get("cluster_center") # 获取 cluster_center(整数) if data_idx and cluster_center is not None: kmeans_data[data_idx] = cluster_center except Exception as e: print(f"Error parsing K-Means line: {line}, error: {e}") # 2. 读取 JSONL 文件,匹配并合并 cluster_center output_lines = [] with open('/data/zhaochsh01/buquan/12345/instag/123451wfilter_instag.jsonl', 'r', encoding='utf-8') as f: for line in f: line = line.strip() if not line: continue try: data = json.loads(line) key_id = str(data.get("工单编号")) # 转为字符串确保匹配 if key_id in kmeans_data: data["cluster_center"] = kmeans_data[key_id] # 添加 cluster_center output_lines.append(json.dumps(data, ensure_ascii=False)) # 重新转为 JSON 字符串 except Exception as e: print(f"Error processing JSONL line: {line}, error: {e}") # 3. 将结果写入新文件 with open('merged_result.jsonl', 'w', encoding='utf-8') as f: for line in output_lines: f.write(line + '\n') print("数据处理完成,结果已保存到 merged_result.jsonl")