diff --git a/智数员工/zhaopin_zaoshu.py b/智数员工/zhaopin_zaoshu.py new file mode 100644 index 0000000..0422cd4 --- /dev/null +++ b/智数员工/zhaopin_zaoshu.py @@ -0,0 +1,205 @@ +import json +import random +import asyncio +from typing import List, Dict, Any, Tuple +from concurrent.futures import ThreadPoolExecutor +import pandas as pd +from openpyxl import Workbook +import requests +import logging +logging.basicConfig(level=logging.INFO) +# 配置参数 +class Config: + OUTPUT_FILE = "recruitment_data.xlsx" + FIXED_QUESTIONS = [ + "上个月面试了多少人", + "本周安排了几个面试", + "招聘进度如何", + "有多少候选人进入二面", + "销售岗位的招聘情况", + "技术岗位的简历筛选数量", + "最近一周的offer发放数量", + "哪个部门的招聘完成率最高", + "招聘成本是否超出预算", + "候选人平均面试周期是多长" + ] + LOCATIONS = ["北京", "上海", "广州", "深圳", "杭州", "", "成都"] + INTENTS = ["招聘数据", "招聘进度", "其他", "成本分析", "效率统计"] + COMMISSIONER_TYPES = ["yxz", "hrbp", "recruiter", "manager"] + USER_NAMES = ["张招聘", "李HR", "王人事", "赵经理", "刘专员"] + + + + +async def chat(input_content): + response = requests.post( + api_url = "http://100.105.1.227:8000/v1/chat/completions", + headers = { + "Content-Type": "application/json", + "Authorization": "7c3eafb5-2d6e-100d-ab0f-7b2c1cdafb3c" + }, + json={ + "model": "Qwen3-72B", + "stream": False, + "temperature": 0.6, + "TopP": 0.95, + "TopK": 20, + "MinP": 0, + "messages": [{"role": "user", "content": input_content}] + }, + timeout=180 + ) + + if response.status_code == 200: + try: + result = response.json()["choices"][0]["message"]["content"] + except Exception as e: + logging.error(f"Error processing API response: {e}") + else: + logging.error(f"API request failed with status code: {response.status_code}") + await asyncio.sleep(0.1) + return result + + + +# 模拟模型生成多样化问题 +async def generate_diverse_questions() -> List[str]: + # 这里应该是实际调用模型生成多样化问题的代码 + # 模拟生成几个变体问题 + + input_content = """你是一个资深HR分析师。请生成一个招聘数据分析的查询请求,要求: +- 聚焦在以下至少一个方面:面试、offer、入职、渠道效果、成本、周期时间 +- 包含具体的时间范围(如最近一周/上月/本季度) +- 可选项包含部门/岗位/地域等维度 +- 直接返回问题,不要任何解释 + +例如: +对比北京和上海地区过去两个月销售岗位的offer接受率""" + gen_question = chat(input_content) + await asyncio.sleep(0.1) + + return gen_question + + + +# 生成招聘相关的输入数据 +async def generate_input_data(use_fixed: bool = True) -> Dict[str, Any]: + if random.random() > 0.3: + base_question = random.choice(Config.FIXED_QUESTIONS) + else: + + base_question = await generate_diverse_questions() + + + return { + "messages": [{ + "role": "user", + "content": base_question + }], + "location": random.choice(Config.LOCATIONS), + "uuid": str(random.randint(1e18, 1e19-1)), + "intent": random.choice(Config.INTENTS), + "loginUserName": random.choice(Config.USER_NAMES), + "loginUserId": "hr_" + str(random.randint(1000, 9999)), + "commissioner_type": random.choice(Config.COMMISSIONER_TYPES) + } + +# 处理单个请求 +async def process_request(input_data: Dict[str, Any]) -> Tuple[Dict[str, Any], Dict[str, Any]]: + try: + input_content = f""" +你是一个专业招聘数据分析助手。请按以下规则处理问题: +1. 如果问题已包含明确且可清晰回答,直接返回原问题 +2. 如果问题模糊或不完整,按标准改写: + - 补充时间范围(最近/上月/本季度等) + - 明确量化指标(数量/比率/趋势等) + - 指定具体对象(岗位/部门/渠道等) +3. 直接返回最终问题,不要任何解释 + +待处理问题:{input_data} +""" + user_content = input_data["messages"][0]["content"] + rewritten_question = await chat(input_content) + + output_data = { + "code": "0", + "message": "", + "result": rewritten_question + } + return input_data, output_data + except Exception as e: + output_data = { + "code": "1", + "message": str(e), + "result": "" + } + return input_data, output_data + +# 保存数据到Excel +def save_to_excel(data: List[Dict[str, Any]], filename: str): + rows = [] + for item in data: + input_data = item["input"] + output_data = item["output"] + + row = { + "输入问题": input_data["messages"][0]["content"], + "输出问题": output_data["result"], + "地点": input_data["location"], + "UUID": input_data["uuid"], + "意图": input_data["intent"], + "用户名": input_data["loginUserName"], + "用户ID": input_data["loginUserId"], + "专员类型": input_data["commissioner_type"], + "状态码": output_data["code"], + "消息": output_data["message"] + } + rows.append(row) + + df = pd.DataFrame(rows) + df.to_excel(filename, index=False, engine='openpyxl') + print(f"数据已保存到 {filename}") + +# 并发生成数据 +async def generate_data(num_samples: int) -> List[Dict[str, Any]]: + # 首先生成所有输入数据 + input_tasks = [generate_input_data() for _ in range(num_samples)] + input_data_list = await asyncio.gather(*input_tasks) + + # 然后并发处理所有请求 + process_tasks = [process_request(input_data) for input_data in input_data_list] + results = await asyncio.gather(*process_tasks) + + # 组合结果 + output = [] + for input_data, output_data in results: + output.append({ + "input": input_data, + "output": output_data + }) + + return output + +# 主函数 +async def main(): + try: + + num_samples = 2000 + print(f"开始生成 {num_samples} 条招聘数据...") + data_pairs = await generate_data(num_samples) + + save_to_excel(data_pairs, Config.OUTPUT_FILE) + + # 打印前3条样本 + print("\n样本示例:") + for i, pair in enumerate(data_pairs[:3], 1): + print(f"样本 {i}:") + print("输入问题:", pair["input"]["messages"][0]["content"]) + print("输出问题:", pair["output"]["result"]) + print("-" * 50) + + except Exception as e: + print(f"发生错误: {e}") + +if __name__ == "__main__": + asyncio.run(main()) \ No newline at end of file