| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566 |
- import time
- from fallback import fallback
- from dianceng import dianceng
- from lingji import lingji
- from jieheceng import jieheceng
- from celery_app import celery_app
- from postprocess import postprocess
- from extra import extra, need_extra
- from huansuan import callzaihuansuan
- import json
- import dedata
- import chromadb
- client = chromadb.HttpClient(host='47.101.198.30',port=8000)
- collection = client.get_or_create_collection(name="tj_de_bge")
- cuoshi_collection = client.get_or_create_collection(name="tj_cuoshi_bge")
- menchuang_collection = client.get_or_create_collection(name="tj_menchuang_bge")
- from FlagEmbedding import FlagModel
- model = FlagModel('/mnt/d/Develop/bge/test2_encoder_only_base_bge-large-zh-v1.5')
- cuoshi_model = FlagModel('cuoshi_encoder_only_base_bge-large-zh-v1.5/cuoshi_encoder_only_base_bge-large-zh-v1.5')
- from sentence_transformers import CrossEncoder
- ce = CrossEncoder('/mnt/d/Develop/celery/final')
- cuoshi_ce = CrossEncoder('cuoshi_reranker/final')
- #ce = CrossEncoder('/Users/zxp/Downloads/reranker')
- with open("hunningtu_rule", "r") as f:
- content = f.read()
- obj = json.loads(content)
- with open("nantong_rule", "r") as f:
- content = f.read()
- obj2 = json.loads(content)
- with open("basic_rule", "r") as f:
- content = f.read()
- basic = json.loads(content)
- with open("menchuang_rule", "r") as f:
- content = f.read()
- menchuang = json.loads(content)
- with open("incremental_rule", "r") as f:
- content = f.read()
- incremental = json.loads(content)
- with open("label_name", "r") as f:
- content = f.read()
- label_name = json.loads(content)
- with open("name_label", "r") as f:
- content = f.read()
- name_label = json.loads(content)
- with open("name_dw", "r") as f:
- content = f.read()
- name_dw = json.loads(content)
- THRESHOLD=0.8####adjust it
- import os
- from openai import OpenAI
- import requests
- aiclient = OpenAI(
- #api_key=os.getenv("DASHSCOPE_API_KEY"),
- api_key=os.getenv("ZAI_API_KEY"),
- #base_url="https://dashscope.aliyuncs.com/compatible-mode/v1",
- base_url="https://open.bigmodel.cn/api/paas/v4/",
- )
- qwclient = OpenAI(
- # 若没有配置环境变量,请用百炼API Key将下行替换为:api_key="sk-xxx",
- #api_key=os.getenv("DASHSCOPE_API_KEY"),
- api_key=os.getenv("MS_API_KEY"),
- #base_url="https://dashscope.aliyuncs.com/compatible-mode/v1",
- base_url="https://api-inference.modelscope.cn/v1/",
- )
- hyclient = OpenAI(
- # 若没有配置环境变量,请用百炼API Key将下行替换为:api_key="sk-xxx",
- #api_key=os.getenv("DASHSCOPE_API_KEY"),
- api_key=os.getenv("HY_API_KEY"),
- #base_url="https://dashscope.aliyuncs.com/compatible-mode/v1",
- base_url="https://api.hunyuan.cloud.tencent.com/v1",
- )
- bdclient = OpenAI(
- # 若没有配置环境变量,请用百炼API Key将下行替换为:api_key="sk-xxx",
- #api_key=os.getenv("DASHSCOPE_API_KEY"),
- api_key=os.getenv("BD_API_KEY"),
- #base_url="https://dashscope.aliyuncs.com/compatible-mode/v1",
- base_url="https://qianfan.baidubce.com/v2",
- )
- sfclient = OpenAI(
- # 若没有配置环境变量,请用百炼API Key将下行替换为:api_key="sk-xxx",
- #api_key=os.getenv("DASHSCOPE_API_KEY"),
- api_key=os.getenv("SF_API_KEY"),
- #base_url="https://dashscope.aliyuncs.com/compatible-mode/v1",
- base_url="https://api.siliconflow.cn/v1",
- )
- def callzaikuailiao(data):
- time.sleep(1)
- completion = aiclient.chat.completions.create(
- # 模型列表:https://help.aliyun.com/zh/model-studio/getting-started/models
- model="glm-4.5-flash",
- #model="ZhipuAI/GLM-4.5",
- #model="qwen3-4b",
- messages=[
- {"role": "system", "content": "You are a helpful assistant."},
- {"role": "user", "content": " 楼地面有多种做法,包括整体面层,块料面层,木地板等。块料面层常见的有石材块料面板,缸砖,马赛克,假麻石块,地砖,橡胶塑料板等。 现在给定一工作内容如下: " + data['label'] + " " + data['mc'] + " " + data['tz'] + ", 计量单位为" + data['dw'] + ", 请问该工作内容中包括了块料面层施工吗?"},
- ],
- # Qwen3模型通过enable_thinking参数控制思考过程(开源版默认True,商业版默认False)
- # 使用Qwen3开源版模型时,若未启用流式输出,请将下行取消注释,否则会报错
- #extra_body={"enable_thinking": False},
- extra_body={"thinking": {"type": "disabled"}},
- )
- json_string = completion.choices[0].message.content
- print(json_string)
- time.sleep(1)
- completion = sfclient.chat.completions.create(
- # 模型列表:https://help.aliyun.com/zh/model-studio/getting-started/models
- model="THUDM/GLM-4-9B-0414",
- #model="glm-4.5-flash",
- messages=[
- {"role": "system", "content": "You are a helpful assistant.请将最终答案以JSON格式输出"},
- {"role": "user", "content": " 给你一段文字如下, " + json_string + ",其中给出了关于是否包括块料面层施工的判断,请将该判断输出。请输出是或者否"},
- ],
- # Qwen3模型通过enable_thinking参数控制思考过程(开源版默认True,商业版默认False)
- # 使用Qwen3开源版模型时,若未启用流式输出,请将下行取消注释,否则会报错
- #extra_body={"enable_thinking": False},
- extra_body={"thinking": {"type": "disabled"}},
- )
- json_string = completion.choices[0].message.content
- print(json_string)
- return json_string
- def callzaiclarify(data):
- completion = aiclient.chat.completions.create(
- # 模型列表:https://help.aliyun.com/zh/model-studio/getting-started/models
- model="glm-4.5-flash",
- #model="qwen3-4b",
- messages=[
- {"role": "system", "content": "You are a helpful assistant."},
- {"role": "user", "content": " 管桩的清单工作内容描述可以分成两类,一类是打桩、压桩,属于桩基工程的一种,其工作内容计量的单位一般是米(m)或根。另一类是填芯,一般是向桩芯内浇混凝土,属于土建工程的一种,其配套的计量单位一般是立方米(m3),即浇混凝土的体积量。现在给定一工作内容如下: " + data['label'] + " " + data['mc'] + " " + data['tz'] + ", 计量单位为" + data['dw'] + ", 请问该工作内容属于填芯吗?"},
- ],
- # Qwen3模型通过enable_thinking参数控制思考过程(开源版默认True,商业版默认False)
- # 使用Qwen3开源版模型时,若未启用流式输出,请将下行取消注释,否则会报错
- #extra_body={"enable_thinking": False},
- extra_body={"thinking": {"type": "disabled"}},
- )
- json_string = completion.choices[0].message.content
- print(json_string)
- completion = sfclient.chat.completions.create(
- # 模型列表:https://help.aliyun.com/zh/model-studio/getting-started/models
- model="THUDM/GLM-4-9B-0414",
- #model="glm-4.5-flash",
- messages=[
- {"role": "system", "content": "You are a helpful assistant.请将最终答案以JSON格式输出"},
- {"role": "user", "content": " 给你一段文字如下, " + json_string + ",其中给出了关于是不是填芯的判断,请将该判断输出。请输出是或者否"},
- ],
- # Qwen3模型通过enable_thinking参数控制思考过程(开源版默认True,商业版默认False)
- # 使用Qwen3开源版模型时,若未启用流式输出,请将下行取消注释,否则会报错
- #extra_body={"enable_thinking": False},
- extra_body={"thinking": {"type": "disabled"}},
- )
- json_string = completion.choices[0].message.content
- print(json_string)
- return json_string
- def callzaidw(A,B):
- time.sleep(1)
- completion = qwclient.chat.completions.create(
- # 模型列表:https://help.aliyun.com/zh/model-studio/getting-started/models
- #model="glm-4.5-flash",
- model="ZhipuAI/GLM-4.5",
- #model="qwen3-4b",
- messages=[
- {"role": "system", "content": "You are a helpful assistant."},
- {"role": "user", "content": " 计量单位可以用名称或者符号表示,常用的符号包括表示米的符号m,表示千米的符号km,表示吨的符号t,表示千克的符号kg,表示平方米的符号m2,表示立方米的符号m3。也有计量单位很宽泛,比如“项”、“次”. 给定一个工作量计量单位,内容为" + A + ",记作A,再给定一个工作量计量单位,内容为" + B + ",记作B。若两个单位相等,请返回A=B。例如,“项”跟“次”是等价的,应返回A=B。若两个单位不相等,但是存在比例换算关系,请返回比例换算关系,例如A单位是m,B单位是10m, 则返回A=0.1*B。再例如,A单位是10m2,B单位是m2,则返回A=10*B。再例如,A单位是m3, B单位是1000m3,则返回A=0.001*B。若两个单位不相等,且不存在比例换算关系,请返回A<>B,例如A单位是m,B单位是m2,一个表示长度,一个表示面积,不存在比例关系,则返回A<>B。 "},
- ],
- # Qwen3模型通过enable_thinking参数控制思考过程(开源版默认True,商业版默认False)
- # 使用Qwen3开源版模型时,若未启用流式输出,请将下行取消注释,否则会报错
- #extra_body={"enable_thinking": False},
- extra_body={"thinking": {"type": "disabled"}},
- )
- json_string = completion.choices[0].message.content
- print(json_string)
- time.sleep(1)
- completion = qwclient.chat.completions.create(
- # 模型列表:https://help.aliyun.com/zh/model-studio/getting-started/models
- model="ZhipuAI/GLM-4.5",
- #model="glm-4.5-flash",
- messages=[
- {"role": "system", "content": "You are a helpful assistant.请将最终答案以JSON格式输出"},
- {"role": "user", "content": " 给你一段文字如下, " + json_string + ",其中给出了一个类似于A=B的表达式作为答案,请将该最终答案输出"},
- ],
- # Qwen3模型通过enable_thinking参数控制思考过程(开源版默认True,商业版默认False)
- # 使用Qwen3开源版模型时,若未启用流式输出,请将下行取消注释,否则会报错
- #extra_body={"enable_thinking": False},
- extra_body={"thinking": {"type": "disabled"}},
- )
- json_string = completion.choices[0].message.content
- print(json_string)
- return json_string
- def callzai(A,B,C):
- completion = aiclient.chat.completions.create(
- # 模型列表:https://help.aliyun.com/zh/model-studio/getting-started/models
- model="glm-4.5-flash",
- #model="qwen3-4b",
- messages=[
- {"role": "system", "content": "You are a helpful assistant."},
- {"role": "user", "content": " 给定一条主定额,内容为" + A + ",记作A,再给定一条配套定额,内容为" + B + ",记作B。可以通过简单的组合,来表达对应的工作内容的数量,比如A+B可以表达,主定额的工作量加上配套定额的工作量;再比如,A+B*2可以表达, 主定额的工作量加上两倍的配套定额的工作量;再比如,A+B*(-2)可以表达, 主定额减去两倍的配套定额的工作量;再比如,A可以表示,不使用配套定额,仅表示主定额的工作量。现在给你一条工程量清单,内容为" + C + ",该条清单包含了主定额描述的工作内容,但是数量并不一定一致。请你组合A与B,表示出清单描述的对应工作数量。请输出类似A+B、A+B*2、A-B*2的格式,不要输出A+2*B、A-2*B的格式。如果清单里相应工作量的描述不明确,请输出A作为答案 "},
- ],
- # Qwen3模型通过enable_thinking参数控制思考过程(开源版默认True,商业版默认False)
- # 使用Qwen3开源版模型时,若未启用流式输出,请将下行取消注释,否则会报错
- #extra_body={"enable_thinking": False},
- extra_body={"thinking": {"type": "disabled"}},
- )
- json_string = completion.choices[0].message.content
- print(json_string)
- if len(json_string) < 4:
- return json_string
- completion = aiclient.chat.completions.create(
- # 模型列表:https://help.aliyun.com/zh/model-studio/getting-started/models
- #model="qwen3-4b",
- model="glm-4.5-flash",
- messages=[
- {"role": "system", "content": "You are a helpful assistant.请将最终答案以JSON格式输出"},
- {"role": "user", "content": " 给你一段文字如下, " + json_string + ",其中给出了一个类似于A+B的表达式作为答案,请将该最终答案输出"},
- ],
- # Qwen3模型通过enable_thinking参数控制思考过程(开源版默认True,商业版默认False)
- # 使用Qwen3开源版模型时,若未启用流式输出,请将下行取消注释,否则会报错
- #extra_body={"enable_thinking": False},
- extra_body={"thinking": {"type": "disabled"}},
- )
- json_string = completion.choices[0].message.content
- return json_string
- def transform(answer, input, entry):
- answers = answer.split("\n")
- answers = [x for x in answers if ':' in x and "A" in x]
- if len(answers) == 0:
- return input
- answer2 = answers[0].split(":")[1].replace(" ", "")
- answer2 = answer2.replace("A", input)
- answer2 = answer2.replace("B", "["+entry+"]")
- answer2 = answer2.replace("\'", "")
- answer2 = answer2.replace("\"", "")
- return answer2
- def zuhe(input, content):
- if input in incremental:
- option = incremental[input]
- for entry in option:
- answer = callzai(name_label[input], name_label[entry], content)
- print("answer of callai for zuhe")
- print(answer)
- if 'B' in answer:
- return transform(answer, input, entry)
- return input
- else:
- return input
- def huansuan_highlevel(bianma, label, input,dw, tz):
- time.sleep(1)
- t = huansuan(input, dw)
- if t == 0:
- dw1 = input
- dw1 = dw1.lower()
- dw1 = dw1.replace("水平投影面积", "")
- dw2 = name_dw[dw]
- dw2 = dw2.lower()
- dw2 = dw2.replace("水平投影面积", "")
- answer = callzaihuansuan(bianma, label, dw1,dw2,tz, aiclient, qwclient)
- answers = answer.split("\n")
- answers = [x for x in answers if ':' in x and "A" in x]
- answers = [x for x in answers if '=' in x or '<>' in x]
- print("answer of callzaihuansuan")
- print(answers)
- if len(answers) == 0:
- return 0
- answer2 = answers[0].split(":")[1].replace(" ", "")
- if "<>" in answer2:
- return 0
- answer2 = answer2.replace("\'", "")
- answer2 = answer2.replace("\"", "")
- answer2 = answer2.split("=")[1]
- if answer2 == "B":
- return 1
- answer2=answer2.replace("B", "")
- answer2=answer2.replace("*", "")
- answer2=answer2.replace("x", "")
- answer2=answer2.replace(",", "")
- answer2=answer2.replace(",", "")
- answer2=answer2.replace("×", "")
- print(answer2)
- return float(answer2)
- else:
- if '人工修边坡' in label:
- return t * 0.1
- else:
- return t
- def huansuan(input, dw):
- dw1 = input
- dw1 = dw1.lower()
- dw1 = dw1.replace("水平投影面积", "")
- dw2 = name_dw[dw]
- dw2 = dw2.lower()
- dw2 = dw2.replace("水平投影面积", "")
- if dw1 == dw2:
- return 1
- else:
- answer = callzaidw(dw1,dw2)
- answers = answer.split("\n")
- answers = [x for x in answers if ':' in x and "A" in x]
- answers = [x for x in answers if '=' in x or '<>' in x]
- print(answers)
- if len(answers) == 0:
- return 0
- answer2 = answers[0].split(":")[1].replace(" ", "")
- if "<>" in answer2:
- return 0
- answer2 = answer2.replace("\'", "")
- answer2 = answer2.replace("\"", "")
- answer2 = answer2.replace(",", "")
- answer2 = answer2.split("=")[1]
- if answer2 == "B":
- return 1
- answer2=answer2.replace("B", "")
- answer2=answer2.replace("*", "")
- answer2=answer2.replace("x", "")
- answer2=answer2.replace("×", "")
- print(answer2)
- return float(answer2)
- def clarify(data):
- data['tz'] = data['tz'].replace("水泥基防水涂料", "水泥基渗透结晶防水涂料")
- data['tz'] = data['tz'].replace("无机保温砂浆", "无机轻集料保温砂浆")
- data['tz'] = data['tz'].replace("JS防水涂料", "聚合物水泥防水涂料")
- if data['bianma'].startswith("010301"):##打桩
- print("clarify")
- result = callzaiclarify(data)
- if "是" in result:
- data['mc'] = data['mc'] + '填芯'
- return data, False
- elif data['bianma'].startswith("010507001"): ##散水、坡道
- data['tz'] = lingji(data['tz'], aiclient, qwclient)
- data['tz'] = dianceng(data['tz'], aiclient, qwclient)
- return data, False
- elif data['bianma'].startswith("0109"): ##防水
- data['tz'] = lingji(data['tz'], aiclient, qwclient)
- data['tz'] = dianceng(data['tz'], aiclient, qwclient)
- return data, False
- elif data['bianma'].startswith("0111"):
- data['tz'] = jieheceng(data['tz'], aiclient, qwclient)
- result = callzaikuailiao(data)
- if '是' in result:
- return data, True
- return data, False
- else:
- return data, False
- @celery_app.task
- def process_data(data:dict)-> dict:
- placeholder, kuailiao = clarify(data)
- label = data['mc'] + ' ' + data['tz']
- if data['bianma'].startswith("0117"):
- label = data['label'] + " " + data['mc'] + " " + data['tz']
- sentences = [label]
- if data['bianma'].startswith("0117"):
- embeddings = cuoshi_model.encode(sentences)
- else:
- embeddings = model.encode(sentences)
- if data['bianma'].startswith("0117"):
- result = cuoshi_collection.query(query_embeddings=embeddings,n_results=25)
- else:
- result = collection.query(query_embeddings=embeddings,n_results=25)
- d = result['documents'][0]
- print(d)
- if data['bianma'].startswith("0117"):
- ranks = cuoshi_ce.rank(label, d)
- else:
- ranks = ce.rank(label, d)
- if data['bianma'].startswith("0117"):
- cutoff = 0.6
- else:
- cutoff = THRESHOLD
-
- ranks = ranks[:10]
- match = [("6.2.1.1","6.3.1.1"),
- ("6.2.1.2", "6.3.1.2"),
- ("6.2.1.3", "6.3.1.3"),
- ("6.2.1.4", "6.3.1.4"),
- ("6.2.1.5", "6.3.1.5"),
- ("6.2.1.6", "6.3.1.6"),
- ("6.2.2.1", "6.3.2.1"),
- ("6.2.2.2", "6.3.2.2"),
- ("6.2.3.1", "6.3.3.1"),
- ("6.2.3.2", "6.3.3.2"),
- ("6.2.3.3", "6.3.3.3"),
- ("6.2.3.4", "6.3.3.4"),
- ("6.2.3.5", "6.3.3.5"),
- ("6.2.3.6", "6.3.3.6")]
- match2=[
- ("1.1.7", "nantong1.1.7"),
- ("nantong2.1.2", "2.1.2"),
- ("nantong3.1.2", "3.1.2"),
- ("nantong3.1.4", "3.1.4"),
- ("nantong3.1.5", "3.1.5"),
- ("3.2.10", "nantong3.2.10"),
- ("nantong4.1.1", "4.1.1"),
- ("nantong4.1.2", "4.1.2"),
- ("nantong4.1.3", "4.1.3"),
- ("4.1.4", "nantong4.1.4"),
- ("4.1.5", "nantong4.1.5"),
- ("4.1.7", "nantong4.1.7"),
- ("4.4", "nantong4.4"),
- ("nantong6", "6"),
- ("7.5", "nantong7.5"),
- ("nantong7.8", "7.8"),
- ("10.1.5", "nantong10.1.5"),
- ("10.1.2", "nantong10.1.2"),
- ("10.1.1", "nantong10.1.1"),
- ("nantong10.1.1.2", "10.1.1.2"),
- ("10.1.1.3", "nantong10.1.1.3"),
- ("nantong11.1.2.1", "11.1.2.1"),
- ("nantong11.1.2.2", "11.1.2.2"),
- ("nantong11.1.1", "11.1.1"),
- ("12.7", "nantong12.7"),
- ("12.6", "nantong12.6"),
- ("nantong12.5", "12.5"),
- ("nantong13.1.1", "13.1.1"),
- ("nantong13.1.2" , "13.1.2"),
- ("nantong13.1.3", "13.1.3"),
- ("nantong13.2.2", "13.2.2"),
- ("nantong13.3.1", "13.3.1"),
- ("nantong13.3.2", "13.3.2"),
- ("13.3.3" ,"nantong13.3.3"),
- ("13.4.4", "nantong13.4.4"),
- ("nantong13.5.1", "13.5.1"),
- ("13.5.4", "nantong13.5.4"),
- ("nantong14.3.8", "14.3.8"),
- ("14.4.4", "nantong14.4.4"),
- ("14.4.6", "nantong14.4.6"),
- ("nantong15.3.1", "15.3.1"),
- ("16.2", "nantong16.2"),
- ("17.1.3.2", "nantong17.1.3.2"),
- ("17.1.3.3","nantong17.1.3.3"),
- ("17.1.3.4","nantong17.1.3.4"),
- ("18.3.3","nantong18.3.3"),
- ("18.3.2","nantong18.3.2"),
- ("18.5","nantong18.5"),
- ("18.6","nantong18.6"),
- ("18.15","nantong18.15"),
- ("20.1.1","nantong20.1.1"),
- ("20.1.2.1","nantong20.1.2.1"),
- ("20.1.2.3","nantong20.1.2.3"),
- ("20.1.2.5","nantong20.1.2.5"),
- ("21.1.1.1","nantong21.1.1.1"),
- ("21.1.1.2","nantong21.1.1.2"),
- ("21.1.3.1","nantong21.1.3.1"),
- ("21.1.3.2","nantong21.1.3.2"),
- ("21.1.3.3","nantong21.1.3.3"),
- ("21.1.5","nantong21.1.5"),
- ("21.1.6","nantong21.1.6"),
- ("21.1.7","nantong21.1.7"),
- ("23.1.2","nantong23.1.2")
- ]
- match3=[('16.1', '16.2'), ('16.1.1', '16.2.1')]
- selected=[]
- notselected=[]
- #if data['bianma'].startswith('0108'): ##门窗
- # score = -1
- # for rank in ranks:
- # if label_name[d[rank['corpus_id']]].startswith('16-') and int(label_name[d[rank['corpus_id']]].split('-')[1]) < 308:
- # score = rank['score']
- # break
- # if score > -1 and score < cutoff:
- # cutoff = score - 0.1
- # if cutoff < 0.3:
- # cutoff = 0.3
- if data['bianma'].startswith('0106'):##金属结构
- score = -1
- for rank in ranks:
- if label_name[d[rank['corpus_id']]].startswith('7-') and label_name[d[rank['corpus_id']]] != '7-62' and label_name[d[rank['corpus_id']]] !='7-63':
- score = rank['score']
- break
- if score > -1 and score < cutoff:
- cutoff = score - 0.05
- if cutoff < 0.3:
- cutoff = 0.3
- if data['bianma'].startswith('0111'):##楼地面
- score = -1
- for rank in ranks:
- if '13.4' in d[rank['corpus_id']] and '块料面层' in d[rank['corpus_id']]:
- score = rank['score']
- break
- if score > -1 and score < cutoff:
- cutoff = score - 0.05
- if cutoff < 0.3:
- cutoff = 0.3
- print("cutoff=" + str(cutoff))
- for entry in incremental:
- notselected = notselected + incremental[entry]
- notselected=[name_label[x] for x in notselected]
- for rank in ranks:
-
- print(f"{rank['score']} {d[rank['corpus_id']]}")
- if rank['score']<cutoff:
- continue
- if d[rank['corpus_id']] in notselected:
- if d[rank['corpus_id']] != '盐城补充定额 盐城补充定额2018 平面立面及其它防水 涂刷油类 水泥基渗透结晶防水每增减0.5mm厚':
- continue
- else:
- d[rank['corpus_id']]= '第十章 屋面及防水工程 10.2 平面立面及其它防水 10.2.1 涂刷油类 水泥基渗透结晶 防水材料 二~三遍(厚2mm)'
- if d[rank['corpus_id']] in notselected:
- continue
- print(f"select {rank['score']} {d[rank['corpus_id']]}")
- selected.append(d[rank['corpus_id']])
- hunningtu_group = []
- for entry in obj:
- if d[rank['corpus_id']] in obj[entry]:
- hunningtu_group=[entry]
- if len(hunningtu_group) > 0:
- for entry in match:
- if entry[0]==hunningtu_group[0] or entry[1] == hunningtu_group[0]:
- notselected = notselected + obj[entry[0]]
- notselected = notselected + obj[entry[1]]
- menchuang_group = []
- for entry in menchuang:
- if d[rank['corpus_id']] in menchuang[entry]:
- menchuang_group=[entry]
- if len(menchuang_group) > 0:
- for entry in match3:
- if entry[0]==menchuang_group[0] or entry[1] == menchuang_group[0]:
- notselected = notselected + menchuang[entry[0]]
- notselected = notselected + menchuang[entry[1]]
- nantong_group = []
- for entry in obj2:
- if d[rank['corpus_id']] in obj2[entry]:
- nantong_group=[entry]
- if len(nantong_group) > 0:
- for entry in match2:
- if entry[0]==nantong_group[0] or entry[1] == nantong_group[0]:
- notselected = notselected + obj2[entry[0]]
- notselected = notselected + obj2[entry[1]]
- for entry in basic:
- if d[rank['corpus_id']] in basic[entry]:
- notselected = notselected + basic[entry]
- notselected = [x for x in notselected if x not in selected]
- selected = list(set(selected))
- if len(selected) == 0:
- candidates=[]
- for rank in ranks:
- candidates.append(d[rank['corpus_id']])
- selected = fallback(candidates, data, aiclient, qwclient, menchuang_collection, model)
- selected = postprocess(selected, data, aiclient, qwclient,sfclient, label_name, name_dw)
- print("final selected")
- print(selected)
- result = [(label_name[x], huansuan_highlevel(data['bianma'], x, data['dw'], label_name[x], data['mc']+data['tz'])) for x in selected]
- print("after haunsuan")
- print(result)
- result = [(zuhe(x[0], label), x[1]) for x in result]
- print("after zuhe")
- print(result)
- result = [(x[0], x[1], dedata.read_singledexilie2(10, x[0])) for x in result]
- need = need_extra(data, aiclient, qwclient, result)
- if need:
- extra_info = extra(data, aiclient, qwclient, menchuang_collection, model)
- else:
- extra_info = "无"
- response = requests.post("http://localhost:3000/api/transform", json={'bianma': data['bianma'], 'mc': data['mc'], 'tz': data['tz'], 'dw': data['dw'], 'sl': data['sl'], 'n': data['n'], "extra": extra_info, 'result': result})
- return {"result": response.json()}
|