import time from fallback import fallback from dianceng import dianceng from lingji import lingji from jieheceng import jieheceng from celery_app import celery_app from postprocess import postprocess from extra import extra, need_extra from huansuan import callzaihuansuan import json import dedata import chromadb client = chromadb.HttpClient(host='47.101.198.30',port=8000) collection = client.get_or_create_collection(name="tj_de_bge") cuoshi_collection = client.get_or_create_collection(name="tj_cuoshi_bge") menchuang_collection = client.get_or_create_collection(name="tj_menchuang_bge") from FlagEmbedding import FlagModel model = FlagModel('/mnt/d/Develop/bge/test2_encoder_only_base_bge-large-zh-v1.5') cuoshi_model = FlagModel('cuoshi_encoder_only_base_bge-large-zh-v1.5/cuoshi_encoder_only_base_bge-large-zh-v1.5') from sentence_transformers import CrossEncoder ce = CrossEncoder('/mnt/d/Develop/celery/final') cuoshi_ce = CrossEncoder('cuoshi_reranker/final') #ce = CrossEncoder('/Users/zxp/Downloads/reranker') with open("hunningtu_rule", "r") as f: content = f.read() obj = json.loads(content) with open("nantong_rule", "r") as f: content = f.read() obj2 = json.loads(content) with open("basic_rule", "r") as f: content = f.read() basic = json.loads(content) with open("menchuang_rule", "r") as f: content = f.read() menchuang = json.loads(content) with open("incremental_rule", "r") as f: content = f.read() incremental = json.loads(content) with open("label_name", "r") as f: content = f.read() label_name = json.loads(content) with open("name_label", "r") as f: content = f.read() name_label = json.loads(content) with open("name_dw", "r") as f: content = f.read() name_dw = json.loads(content) THRESHOLD=0.8####adjust it import os from openai import OpenAI import requests aiclient = OpenAI( #api_key=os.getenv("DASHSCOPE_API_KEY"), api_key=os.getenv("ZAI_API_KEY"), #base_url="https://dashscope.aliyuncs.com/compatible-mode/v1", base_url="https://open.bigmodel.cn/api/paas/v4/", ) qwclient = OpenAI( # 若没有配置环境变量,请用百炼API Key将下行替换为:api_key="sk-xxx", #api_key=os.getenv("DASHSCOPE_API_KEY"), api_key=os.getenv("MS_API_KEY"), #base_url="https://dashscope.aliyuncs.com/compatible-mode/v1", base_url="https://api-inference.modelscope.cn/v1/", ) hyclient = OpenAI( # 若没有配置环境变量,请用百炼API Key将下行替换为:api_key="sk-xxx", #api_key=os.getenv("DASHSCOPE_API_KEY"), api_key=os.getenv("HY_API_KEY"), #base_url="https://dashscope.aliyuncs.com/compatible-mode/v1", base_url="https://api.hunyuan.cloud.tencent.com/v1", ) bdclient = OpenAI( # 若没有配置环境变量,请用百炼API Key将下行替换为:api_key="sk-xxx", #api_key=os.getenv("DASHSCOPE_API_KEY"), api_key=os.getenv("BD_API_KEY"), #base_url="https://dashscope.aliyuncs.com/compatible-mode/v1", base_url="https://qianfan.baidubce.com/v2", ) sfclient = OpenAI( # 若没有配置环境变量,请用百炼API Key将下行替换为:api_key="sk-xxx", #api_key=os.getenv("DASHSCOPE_API_KEY"), api_key=os.getenv("SF_API_KEY"), #base_url="https://dashscope.aliyuncs.com/compatible-mode/v1", base_url="https://api.siliconflow.cn/v1", ) def callzaikuailiao(data): time.sleep(1) completion = aiclient.chat.completions.create( # 模型列表:https://help.aliyun.com/zh/model-studio/getting-started/models model="glm-4.5-flash", #model="ZhipuAI/GLM-4.5", #model="qwen3-4b", messages=[ {"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": " 楼地面有多种做法,包括整体面层,块料面层,木地板等。块料面层常见的有石材块料面板,缸砖,马赛克,假麻石块,地砖,橡胶塑料板等。 现在给定一工作内容如下: " + data['label'] + " " + data['mc'] + " " + data['tz'] + ", 计量单位为" + data['dw'] + ", 请问该工作内容中包括了块料面层施工吗?"}, ], # Qwen3模型通过enable_thinking参数控制思考过程(开源版默认True,商业版默认False) # 使用Qwen3开源版模型时,若未启用流式输出,请将下行取消注释,否则会报错 #extra_body={"enable_thinking": False}, extra_body={"thinking": {"type": "disabled"}}, ) json_string = completion.choices[0].message.content print(json_string) time.sleep(1) completion = sfclient.chat.completions.create( # 模型列表:https://help.aliyun.com/zh/model-studio/getting-started/models model="THUDM/GLM-4-9B-0414", #model="glm-4.5-flash", messages=[ {"role": "system", "content": "You are a helpful assistant.请将最终答案以JSON格式输出"}, {"role": "user", "content": " 给你一段文字如下, " + json_string + ",其中给出了关于是否包括块料面层施工的判断,请将该判断输出。请输出是或者否"}, ], # Qwen3模型通过enable_thinking参数控制思考过程(开源版默认True,商业版默认False) # 使用Qwen3开源版模型时,若未启用流式输出,请将下行取消注释,否则会报错 #extra_body={"enable_thinking": False}, extra_body={"thinking": {"type": "disabled"}}, ) json_string = completion.choices[0].message.content print(json_string) return json_string def callzaiclarify(data): completion = aiclient.chat.completions.create( # 模型列表:https://help.aliyun.com/zh/model-studio/getting-started/models model="glm-4.5-flash", #model="qwen3-4b", messages=[ {"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": " 管桩的清单工作内容描述可以分成两类,一类是打桩、压桩,属于桩基工程的一种,其工作内容计量的单位一般是米(m)或根。另一类是填芯,一般是向桩芯内浇混凝土,属于土建工程的一种,其配套的计量单位一般是立方米(m3),即浇混凝土的体积量。现在给定一工作内容如下: " + data['label'] + " " + data['mc'] + " " + data['tz'] + ", 计量单位为" + data['dw'] + ", 请问该工作内容属于填芯吗?"}, ], # Qwen3模型通过enable_thinking参数控制思考过程(开源版默认True,商业版默认False) # 使用Qwen3开源版模型时,若未启用流式输出,请将下行取消注释,否则会报错 #extra_body={"enable_thinking": False}, extra_body={"thinking": {"type": "disabled"}}, ) json_string = completion.choices[0].message.content print(json_string) completion = sfclient.chat.completions.create( # 模型列表:https://help.aliyun.com/zh/model-studio/getting-started/models model="THUDM/GLM-4-9B-0414", #model="glm-4.5-flash", messages=[ {"role": "system", "content": "You are a helpful assistant.请将最终答案以JSON格式输出"}, {"role": "user", "content": " 给你一段文字如下, " + json_string + ",其中给出了关于是不是填芯的判断,请将该判断输出。请输出是或者否"}, ], # Qwen3模型通过enable_thinking参数控制思考过程(开源版默认True,商业版默认False) # 使用Qwen3开源版模型时,若未启用流式输出,请将下行取消注释,否则会报错 #extra_body={"enable_thinking": False}, extra_body={"thinking": {"type": "disabled"}}, ) json_string = completion.choices[0].message.content print(json_string) return json_string def callzaidw(A,B): time.sleep(1) completion = qwclient.chat.completions.create( # 模型列表:https://help.aliyun.com/zh/model-studio/getting-started/models #model="glm-4.5-flash", model="ZhipuAI/GLM-4.5", #model="qwen3-4b", messages=[ {"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": " 计量单位可以用名称或者符号表示,常用的符号包括表示米的符号m,表示千米的符号km,表示吨的符号t,表示千克的符号kg,表示平方米的符号m2,表示立方米的符号m3。也有计量单位很宽泛,比如“项”、“次”. 给定一个工作量计量单位,内容为" + A + ",记作A,再给定一个工作量计量单位,内容为" + B + ",记作B。若两个单位相等,请返回A=B。例如,“项”跟“次”是等价的,应返回A=B。若两个单位不相等,但是存在比例换算关系,请返回比例换算关系,例如A单位是m,B单位是10m, 则返回A=0.1*B。再例如,A单位是10m2,B单位是m2,则返回A=10*B。再例如,A单位是m3, B单位是1000m3,则返回A=0.001*B。若两个单位不相等,且不存在比例换算关系,请返回A<>B,例如A单位是m,B单位是m2,一个表示长度,一个表示面积,不存在比例关系,则返回A<>B。 "}, ], # Qwen3模型通过enable_thinking参数控制思考过程(开源版默认True,商业版默认False) # 使用Qwen3开源版模型时,若未启用流式输出,请将下行取消注释,否则会报错 #extra_body={"enable_thinking": False}, extra_body={"thinking": {"type": "disabled"}}, ) json_string = completion.choices[0].message.content print(json_string) time.sleep(1) completion = qwclient.chat.completions.create( # 模型列表:https://help.aliyun.com/zh/model-studio/getting-started/models model="ZhipuAI/GLM-4.5", #model="glm-4.5-flash", messages=[ {"role": "system", "content": "You are a helpful assistant.请将最终答案以JSON格式输出"}, {"role": "user", "content": " 给你一段文字如下, " + json_string + ",其中给出了一个类似于A=B的表达式作为答案,请将该最终答案输出"}, ], # Qwen3模型通过enable_thinking参数控制思考过程(开源版默认True,商业版默认False) # 使用Qwen3开源版模型时,若未启用流式输出,请将下行取消注释,否则会报错 #extra_body={"enable_thinking": False}, extra_body={"thinking": {"type": "disabled"}}, ) json_string = completion.choices[0].message.content print(json_string) return json_string def callzai(A,B,C): completion = aiclient.chat.completions.create( # 模型列表:https://help.aliyun.com/zh/model-studio/getting-started/models model="glm-4.5-flash", #model="qwen3-4b", messages=[ {"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": " 给定一条主定额,内容为" + A + ",记作A,再给定一条配套定额,内容为" + B + ",记作B。可以通过简单的组合,来表达对应的工作内容的数量,比如A+B可以表达,主定额的工作量加上配套定额的工作量;再比如,A+B*2可以表达, 主定额的工作量加上两倍的配套定额的工作量;再比如,A+B*(-2)可以表达, 主定额减去两倍的配套定额的工作量;再比如,A可以表示,不使用配套定额,仅表示主定额的工作量。现在给你一条工程量清单,内容为" + C + ",该条清单包含了主定额描述的工作内容,但是数量并不一定一致。请你组合A与B,表示出清单描述的对应工作数量。请输出类似A+B、A+B*2、A-B*2的格式,不要输出A+2*B、A-2*B的格式。如果清单里相应工作量的描述不明确,请输出A作为答案 "}, ], # Qwen3模型通过enable_thinking参数控制思考过程(开源版默认True,商业版默认False) # 使用Qwen3开源版模型时,若未启用流式输出,请将下行取消注释,否则会报错 #extra_body={"enable_thinking": False}, extra_body={"thinking": {"type": "disabled"}}, ) json_string = completion.choices[0].message.content print(json_string) if len(json_string) < 4: return json_string completion = aiclient.chat.completions.create( # 模型列表:https://help.aliyun.com/zh/model-studio/getting-started/models #model="qwen3-4b", model="glm-4.5-flash", messages=[ {"role": "system", "content": "You are a helpful assistant.请将最终答案以JSON格式输出"}, {"role": "user", "content": " 给你一段文字如下, " + json_string + ",其中给出了一个类似于A+B的表达式作为答案,请将该最终答案输出"}, ], # Qwen3模型通过enable_thinking参数控制思考过程(开源版默认True,商业版默认False) # 使用Qwen3开源版模型时,若未启用流式输出,请将下行取消注释,否则会报错 #extra_body={"enable_thinking": False}, extra_body={"thinking": {"type": "disabled"}}, ) json_string = completion.choices[0].message.content return json_string def transform(answer, input, entry): answers = answer.split("\n") answers = [x for x in answers if ':' in x and "A" in x] if len(answers) == 0: return input answer2 = answers[0].split(":")[1].replace(" ", "") answer2 = answer2.replace("A", input) answer2 = answer2.replace("B", "["+entry+"]") answer2 = answer2.replace("\'", "") answer2 = answer2.replace("\"", "") return answer2 def zuhe(input, content): if input in incremental: option = incremental[input] for entry in option: answer = callzai(name_label[input], name_label[entry], content) print("answer of callai for zuhe") print(answer) if 'B' in answer: return transform(answer, input, entry) return input else: return input def huansuan_highlevel(bianma, label, input,dw, tz): time.sleep(1) t = huansuan(input, dw) if t == 0: dw1 = input dw1 = dw1.lower() dw1 = dw1.replace("水平投影面积", "") dw2 = name_dw[dw] dw2 = dw2.lower() dw2 = dw2.replace("水平投影面积", "") answer = callzaihuansuan(bianma, label, dw1,dw2,tz, aiclient, qwclient) answers = answer.split("\n") answers = [x for x in answers if ':' in x and "A" in x] answers = [x for x in answers if '=' in x or '<>' in x] print("answer of callzaihuansuan") print(answers) if len(answers) == 0: return 0 answer2 = answers[0].split(":")[1].replace(" ", "") if "<>" in answer2: return 0 answer2 = answer2.replace("\'", "") answer2 = answer2.replace("\"", "") answer2 = answer2.split("=")[1] if answer2 == "B": return 1 answer2=answer2.replace("B", "") answer2=answer2.replace("*", "") answer2=answer2.replace("x", "") answer2=answer2.replace(",", "") answer2=answer2.replace(",", "") answer2=answer2.replace("×", "") print(answer2) return float(answer2) else: if '人工修边坡' in label: return t * 0.1 else: return t def huansuan(input, dw): dw1 = input dw1 = dw1.lower() dw1 = dw1.replace("水平投影面积", "") dw2 = name_dw[dw] dw2 = dw2.lower() dw2 = dw2.replace("水平投影面积", "") if dw1 == dw2: return 1 else: answer = callzaidw(dw1,dw2) answers = answer.split("\n") answers = [x for x in answers if ':' in x and "A" in x] answers = [x for x in answers if '=' in x or '<>' in x] print(answers) if len(answers) == 0: return 0 answer2 = answers[0].split(":")[1].replace(" ", "") if "<>" in answer2: return 0 answer2 = answer2.replace("\'", "") answer2 = answer2.replace("\"", "") answer2 = answer2.replace(",", "") answer2 = answer2.split("=")[1] if answer2 == "B": return 1 answer2=answer2.replace("B", "") answer2=answer2.replace("*", "") answer2=answer2.replace("x", "") answer2=answer2.replace("×", "") print(answer2) return float(answer2) def clarify(data): data['tz'] = data['tz'].replace("水泥基防水涂料", "水泥基渗透结晶防水涂料") data['tz'] = data['tz'].replace("无机保温砂浆", "无机轻集料保温砂浆") data['tz'] = data['tz'].replace("JS防水涂料", "聚合物水泥防水涂料") if data['bianma'].startswith("010301"):##打桩 print("clarify") result = callzaiclarify(data) if "是" in result: data['mc'] = data['mc'] + '填芯' return data, False elif data['bianma'].startswith("010507001"): ##散水、坡道 data['tz'] = lingji(data['tz'], aiclient, qwclient) data['tz'] = dianceng(data['tz'], aiclient, qwclient) return data, False elif data['bianma'].startswith("0109"): ##防水 data['tz'] = lingji(data['tz'], aiclient, qwclient) data['tz'] = dianceng(data['tz'], aiclient, qwclient) return data, False elif data['bianma'].startswith("0111"): data['tz'] = jieheceng(data['tz'], aiclient, qwclient) result = callzaikuailiao(data) if '是' in result: return data, True return data, False else: return data, False @celery_app.task def process_data(data:dict)-> dict: placeholder, kuailiao = clarify(data) label = data['mc'] + ' ' + data['tz'] if data['bianma'].startswith("0117"): label = data['label'] + " " + data['mc'] + " " + data['tz'] sentences = [label] if data['bianma'].startswith("0117"): embeddings = cuoshi_model.encode(sentences) else: embeddings = model.encode(sentences) if data['bianma'].startswith("0117"): result = cuoshi_collection.query(query_embeddings=embeddings,n_results=25) else: result = collection.query(query_embeddings=embeddings,n_results=25) d = result['documents'][0] print(d) if data['bianma'].startswith("0117"): ranks = cuoshi_ce.rank(label, d) else: ranks = ce.rank(label, d) if data['bianma'].startswith("0117"): cutoff = 0.6 else: cutoff = THRESHOLD ranks = ranks[:10] match = [("6.2.1.1","6.3.1.1"), ("6.2.1.2", "6.3.1.2"), ("6.2.1.3", "6.3.1.3"), ("6.2.1.4", "6.3.1.4"), ("6.2.1.5", "6.3.1.5"), ("6.2.1.6", "6.3.1.6"), ("6.2.2.1", "6.3.2.1"), ("6.2.2.2", "6.3.2.2"), ("6.2.3.1", "6.3.3.1"), ("6.2.3.2", "6.3.3.2"), ("6.2.3.3", "6.3.3.3"), ("6.2.3.4", "6.3.3.4"), ("6.2.3.5", "6.3.3.5"), ("6.2.3.6", "6.3.3.6")] match2=[ ("1.1.7", "nantong1.1.7"), ("nantong2.1.2", "2.1.2"), ("nantong3.1.2", "3.1.2"), ("nantong3.1.4", "3.1.4"), ("nantong3.1.5", "3.1.5"), ("3.2.10", "nantong3.2.10"), ("nantong4.1.1", "4.1.1"), ("nantong4.1.2", "4.1.2"), ("nantong4.1.3", "4.1.3"), ("4.1.4", "nantong4.1.4"), ("4.1.5", "nantong4.1.5"), ("4.1.7", "nantong4.1.7"), ("4.4", "nantong4.4"), ("nantong6", "6"), ("7.5", "nantong7.5"), ("nantong7.8", "7.8"), ("10.1.5", "nantong10.1.5"), ("10.1.2", "nantong10.1.2"), ("10.1.1", "nantong10.1.1"), ("nantong10.1.1.2", "10.1.1.2"), ("10.1.1.3", "nantong10.1.1.3"), ("nantong11.1.2.1", "11.1.2.1"), ("nantong11.1.2.2", "11.1.2.2"), ("nantong11.1.1", "11.1.1"), ("12.7", "nantong12.7"), ("12.6", "nantong12.6"), ("nantong12.5", "12.5"), ("nantong13.1.1", "13.1.1"), ("nantong13.1.2" , "13.1.2"), ("nantong13.1.3", "13.1.3"), ("nantong13.2.2", "13.2.2"), ("nantong13.3.1", "13.3.1"), ("nantong13.3.2", "13.3.2"), ("13.3.3" ,"nantong13.3.3"), ("13.4.4", "nantong13.4.4"), ("nantong13.5.1", "13.5.1"), ("13.5.4", "nantong13.5.4"), ("nantong14.3.8", "14.3.8"), ("14.4.4", "nantong14.4.4"), ("14.4.6", "nantong14.4.6"), ("nantong15.3.1", "15.3.1"), ("16.2", "nantong16.2"), ("17.1.3.2", "nantong17.1.3.2"), ("17.1.3.3","nantong17.1.3.3"), ("17.1.3.4","nantong17.1.3.4"), ("18.3.3","nantong18.3.3"), ("18.3.2","nantong18.3.2"), ("18.5","nantong18.5"), ("18.6","nantong18.6"), ("18.15","nantong18.15"), ("20.1.1","nantong20.1.1"), ("20.1.2.1","nantong20.1.2.1"), ("20.1.2.3","nantong20.1.2.3"), ("20.1.2.5","nantong20.1.2.5"), ("21.1.1.1","nantong21.1.1.1"), ("21.1.1.2","nantong21.1.1.2"), ("21.1.3.1","nantong21.1.3.1"), ("21.1.3.2","nantong21.1.3.2"), ("21.1.3.3","nantong21.1.3.3"), ("21.1.5","nantong21.1.5"), ("21.1.6","nantong21.1.6"), ("21.1.7","nantong21.1.7"), ("23.1.2","nantong23.1.2") ] match3=[('16.1', '16.2'), ('16.1.1', '16.2.1')] selected=[] notselected=[] #if data['bianma'].startswith('0108'): ##门窗 # score = -1 # for rank in ranks: # if label_name[d[rank['corpus_id']]].startswith('16-') and int(label_name[d[rank['corpus_id']]].split('-')[1]) < 308: # score = rank['score'] # break # if score > -1 and score < cutoff: # cutoff = score - 0.1 # if cutoff < 0.3: # cutoff = 0.3 if data['bianma'].startswith('0106'):##金属结构 score = -1 for rank in ranks: if label_name[d[rank['corpus_id']]].startswith('7-') and label_name[d[rank['corpus_id']]] != '7-62' and label_name[d[rank['corpus_id']]] !='7-63': score = rank['score'] break if score > -1 and score < cutoff: cutoff = score - 0.05 if cutoff < 0.3: cutoff = 0.3 if data['bianma'].startswith('0111'):##楼地面 score = -1 for rank in ranks: if '13.4' in d[rank['corpus_id']] and '块料面层' in d[rank['corpus_id']]: score = rank['score'] break if score > -1 and score < cutoff: cutoff = score - 0.05 if cutoff < 0.3: cutoff = 0.3 print("cutoff=" + str(cutoff)) for entry in incremental: notselected = notselected + incremental[entry] notselected=[name_label[x] for x in notselected] for rank in ranks: print(f"{rank['score']} {d[rank['corpus_id']]}") if rank['score'] 0: for entry in match: if entry[0]==hunningtu_group[0] or entry[1] == hunningtu_group[0]: notselected = notselected + obj[entry[0]] notselected = notselected + obj[entry[1]] menchuang_group = [] for entry in menchuang: if d[rank['corpus_id']] in menchuang[entry]: menchuang_group=[entry] if len(menchuang_group) > 0: for entry in match3: if entry[0]==menchuang_group[0] or entry[1] == menchuang_group[0]: notselected = notselected + menchuang[entry[0]] notselected = notselected + menchuang[entry[1]] nantong_group = [] for entry in obj2: if d[rank['corpus_id']] in obj2[entry]: nantong_group=[entry] if len(nantong_group) > 0: for entry in match2: if entry[0]==nantong_group[0] or entry[1] == nantong_group[0]: notselected = notselected + obj2[entry[0]] notselected = notselected + obj2[entry[1]] for entry in basic: if d[rank['corpus_id']] in basic[entry]: notselected = notselected + basic[entry] notselected = [x for x in notselected if x not in selected] selected = list(set(selected)) if len(selected) == 0: candidates=[] for rank in ranks: candidates.append(d[rank['corpus_id']]) selected = fallback(candidates, data, aiclient, qwclient, menchuang_collection, model) selected = postprocess(selected, data, aiclient, qwclient,sfclient, label_name, name_dw) print("final selected") print(selected) result = [(label_name[x], huansuan_highlevel(data['bianma'], x, data['dw'], label_name[x], data['mc']+data['tz'])) for x in selected] print("after haunsuan") print(result) result = [(zuhe(x[0], label), x[1]) for x in result] print("after zuhe") print(result) result = [(x[0], x[1], dedata.read_singledexilie2(10, x[0])) for x in result] need = need_extra(data, aiclient, qwclient, result) if need: extra_info = extra(data, aiclient, qwclient, menchuang_collection, model) else: extra_info = "无" response = requests.post("http://localhost:3000/api/transform", json={'bianma': data['bianma'], 'mc': data['mc'], 'tz': data['tz'], 'dw': data['dw'], 'sl': data['sl'], 'n': data['n'], "extra": extra_info, 'result': result}) return {"result": response.json()}