| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207 |
- import pandas
- import json
- import numpy as np
- import asyncio
- from subdir import db as dbservice
- from subdir import service
- import os
- import hashlib
- from sentence_transformers import SentenceTransformer
- model = SentenceTransformer("BAAI/bge-small-zh-v1.5")
- from pymongo import MongoClient
- client = MongoClient()
- client2 = MongoClient()
- def func1(input, content):
- cleaned = clean(input)
- A1, A2, rg, jx, cl, A6, A7, zhuanye = getSingleDeXilie_(model, client2, 10, cleaned, content)
- ##print(json.loads(A1))
- ##print(zhuanye)
- return zhuanye
- def getSingleDeXilie_(model, client, zhuanye, debh, mc):
- db = client["dinge"]
- collection = db["de-collection"]
- count = 0
- actual_zhuanye = []
- mcs = []
- parents = []
- if "附注" in debh:
- position = debh.find("附注")
- debh = debh[:position]
- if "*" in debh:
- position = debh.find("*")
- debh = debh[:position]
- if debh.endswith("换"):
- debh = debh[:-1]
- if "[" in debh:
- position = debh.find("[")
- debh = debh[:position-1]
- for post in collection.find({'DEBH': debh}):
- ##print(post)
- actual_zhuanye.append(post['zhuanye'])
- mcs.append(post['GCLMC'])
- parents.append(post['parent'])
- count = count + 1
- if count == 0:
- return None, None, None, None, None, None, None, 0
- elif count == 1:
- if actual_zhuanye[0] == '土建':
- A1, A2, A3, A4, A5, A6, A7 = service.getSingleDeXilie_tj(debh)
- return A1, A2, A3, A4, A5, A6, A7, 10
- elif actual_zhuanye[0] == '市政':
- A1, A2, A3, A4, A5, A6, A7 = service.getSingleDeXilie_sz(debh)
- return A1, A2, A3, A4, A5, A6, A7, 20
- elif actual_zhuanye[0] == '安装':
- A1, A2, A3, A4, A5, A6, A7 = service.getSingleDeXilie_az(debh)
- return A1, A2, A3, A4, A5, A6, A7, 30
- elif actual_zhuanye[0] == '园林':
- A1, A2, A3, A4, A5, A6, A7 = service.getSingleDeXilie_yl(debh)
- return A1, A2, A3, A4, A5, A6, A7, 40
- elif actual_zhuanye[0] == '修缮工程土建':
- A1, A2, A3, A4, A5, A6, A7 = service.getSingleDeXilie_xstj(debh)
- return A1, A2, A3, A4, A5, A6, A7, 50
- elif actual_zhuanye[0] == '修缮工程安装':
- A1, A2, A3, A4, A5, A6, A7 = service.getSingleDeXilie_xsaz(debh)
- return A1, A2, A3, A4, A5, A6, A7, 60
- else:
- hit = False
- for i in range(0, count):
- if mcs[i] in mc:
- hit = True
- actual_zhuanye[0] = actual_zhuanye[i]
- if hit:
- if actual_zhuanye[0] == '土建':
- A1, A2, A3, A4, A5, A6, A7 = service.getSingleDeXilie_tj(debh)
- return A1, A2, A3, A4, A5, A6, A7, 10
- elif actual_zhuanye[0] == '市政':
- A1, A2, A3, A4, A5, A6, A7 = service.getSingleDeXilie_sz(debh)
- return A1, A2, A3, A4, A5, A6, A7, 20
- elif actual_zhuanye[0] == '安装':
- A1, A2, A3, A4, A5, A6, A7 = service.getSingleDeXilie_az(debh)
- return A1, A2, A3, A4, A5, A6, A7, 30
- elif actual_zhuanye[0] == '园林':
- A1, A2, A3, A4, A5, A6, A7 = service.getSingleDeXilie_yl(debh)
- return A1, A2, A3, A4, A5, A6, A7, 40
- elif actual_zhuanye[0] == '修缮工程土建':
- A1, A2, A3, A4, A5, A6, A7 = service.getSingleDeXilie_xstj(debh)
- return A1, A2, A3, A4, A5, A6, A7, 50
- elif actual_zhuanye[0] == '修缮工程安装':
- A1, A2, A3, A4, A5, A6, A7 = service.getSingleDeXilie_xsaz(debh)
- return A1, A2, A3, A4, A5, A6, A7, 60
- else:
- for i in range(len(parents)):
- mcs[i] = parents[i] + ' ' + mcs[i]
- mcs.append(mc)
- #print(mcs)
- embeddings = model.encode(mcs)
- similarities = model.similarity( embeddings[-1], embeddings)
- ##array([1.0000002, 0.7662151, 1.0000002], dtype=float32)
- ##print(similarities)
- sim = similarities[0].numpy()[:-1]
- ##print(sim)
- index = np.argmax(sim).item()
- ##print(index)
- ##print(actual_zhuanye)
- actual_zhuanye[0] = actual_zhuanye[index]
- if actual_zhuanye[0] == '土建':
- A1, A2, A3, A4, A5, A6, A7 = service.getSingleDeXilie_tj(debh)
- return A1, A2, A3, A4, A5, A6, A7, 10
- elif actual_zhuanye[0] == '市政':
- A1, A2, A3, A4, A5, A6, A7 = service.getSingleDeXilie_sz(debh)
- return A1, A2, A3, A4, A5, A6, A7, 20
- elif actual_zhuanye[0] == '安装':
- A1, A2, A3, A4, A5, A6, A7 = service.getSingleDeXilie_az(debh)
- return A1, A2, A3, A4, A5, A6, A7, 30
- elif actual_zhuanye[0] == '园林':
- A1, A2, A3, A4, A5, A6, A7 = service.getSingleDeXilie_yl(debh)
- return A1, A2, A3, A4, A5, A6, A7, 40
- elif actual_zhuanye[0] == '修缮工程土建':
- A1, A2, A3, A4, A5, A6, A7 = service.getSingleDeXilie_xstj(debh)
- return A1, A2, A3, A4, A5, A6, A7, 50
- elif actual_zhuanye[0] == '修缮工程安装':
- A1, A2, A3, A4, A5, A6, A7 = service.getSingleDeXilie_xsaz(debh)
- return A1, A2, A3, A4, A5, A6, A7, 60
- def clean(debh):
-
- if "附注" in debh:
- position = debh.find("附注")
- debh = debh[:position]
- if "*" in debh:
- position = debh.find("*")
- debh = debh[:position]
- if debh.endswith("换"):
- debh = debh[:-1]
- if "[" in debh:
- position = debh.find("[")
- debh = debh[:position-1]
- return debh
- def getLabel(client, zhuanye, debh):
- db = client["dinge"]
- collection = db["de-collection"]
- if zhuanye == 0:
- return ""
- if zhuanye == 10:
- for post in collection.find({'DEBH': debh, 'zhuanye' : '土建' }):
- return post['parent'] + " " + post['GCLMC']
- if zhuanye == 20:
- for post in collection.find({'DEBH': debh, 'zhuanye' : '市政' }):
- return post['parent'] + " " + post['GCLMC']
- if zhuanye == 30:
- for post in collection.find({'DEBH': debh, 'zhuanye' : '安装' }):
- return post['parent'] + " " + post['GCLMC']
- if zhuanye == 40:
- for post in collection.find({'DEBH': debh, 'zhuanye' : '园林' }):
- return post['parent'] + " " + post['GCLMC']
- if zhuanye == 50:
- for post in collection.find({'DEBH': debh, 'zhuanye' : '修缮工程土建' }):
- return post['parent'] + " " + post['GCLMC']
- if zhuanye == 60:
- for post in collection.find({'DEBH': debh, 'zhuanye' : '修缮工程安装' }):
- return post['parent'] + " " + post['GCLMC']
- return ""
- db = client["baojia"]
- collection = db["qdxm"]
- dfs = []
- for post in collection.find({'biao_id': "68f1f8fbc93ecc425087f4e8"}):
- print(post)
-
- for entry in post["__children"]:
- zhuanye = func1(entry["清单编码"], entry["名称"])
- label = getLabel(client2, zhuanye, clean(entry["清单编码"]))
- item = {
- '名称' : [post["名称"]],
- "项目特征" : [post["项目特征"]],
- "清单编码" : [post["清单编码"]],
- "定额编码" : [entry["清单编码"]],
- "定额名称" : [entry["名称"]],
- "label" : label,
- "zhuanye" : zhuanye
- }
- df = pandas.DataFrame.from_dict(item)
- dfs.append(df)
- data = pandas.concat(dfs)
- data.to_csv("validate1.csv")
-
-
-
-
|