5 months ago · 2c28ace06a
--- a/Qdembedding.py
+++ b/Qdembedding.py
@@ -0,0 +1,61 @@
 
				+import pymongo
			
 
				+import pandas
			
 
				+import json
			
 
				+from pymongo import MongoClient
			
 
				+import os
			
 
				+import re
			
 
				+import time
			
 
				+from openai import OpenAI
			
 
				+import numpy as np
			
 
				+client = MongoClient()
			
 
				+db = client["dinge"]
			
 
				+collection = db["de-collection"]
			
 
				+from subdir import service
			
 
				+##print(collection.find_one({"DEBH": "3-94"}))
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+client_ = OpenAI(
			
 
				+    api_key='sk-7c7be9c8dda84cb98901c98e0c74a2d8',  # 如果您没有配置环境变量，请在此处用您的API Key进行替换
			
 
				+    base_url="https://dashscope.aliyuncs.com/compatible-mode/v1"  # 百炼服务的base_url
			
 
				+)
			
 
				+
			
 
				+array1 = np.array([])
			
 
				+array2 = np.array([])
			
 
				+array3 = np.array([])
			
 
				+array4 = np.zeros((1,1024))
			
 
				+
			
 
				+count = 0
			
 
				+qd = pandas.read_csv("JD_QingDanXM_parent.csv")
			
 
				+for i in range(len(qd)):
			
 
				+    row = qd.iloc[i] 
			
 
				+    if row['fbcch'].item() == 4:
			
 
				+   
			
 
				+        array1 = np.append(array1, row['qdbh'])
			
 
				+        array2 = np.append(array2, row['xmmc'])
			
 
				+        array3 = np.append(array3, row['parent'])
			
 
				+   
			
 
				+        completion = client_.embeddings.create(
			
 
				+    model="text-embedding-v4",
			
 
				+    input='类别： ' + row['parent'] + ", 内容：" + row["xmmc"],
			
 
				+    dimensions=1024, # 指定向量维度（仅 text-embedding-v3及 text-embedding-v4支持该参数）
			
 
				+    encoding_format="float"
			
 
				+   )
			
 
				+   ##print(completion.data[0].embedding)
			
 
				+        array4 = np.vstack((array4, [completion.data[0].embedding]))
			
 
				+        count = count + 1
			
 
				+        print(count)
			
 
				+        time.sleep(0.5)
			
 
				+   
			
 
				+
			
 
				+con = np.stack((array1, array2, array3))
			
 
				+con = np.transpose(con)
			
 
				+np.save('qd_content.npy', con)
			
 
				+np.save('qd_embedding.npy', array4[1:])
			
 
				+
			
 
				+    
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
--- a/dinge_content_az_10.npy
+++ b/dinge_content_az_10.npy
--- a/dinge_content_az_11.npy
+++ b/dinge_content_az_11.npy
--- a/dinge_content_az_12.npy
+++ b/dinge_content_az_12.npy
--- a/dinge_content_az_8.npy
+++ b/dinge_content_az_8.npy
--- a/dinge_content_az_9.npy
+++ b/dinge_content_az_9.npy
--- a/dinge_embedding_az_10.npy
+++ b/dinge_embedding_az_10.npy
--- a/dinge_embedding_az_11.npy
+++ b/dinge_embedding_az_11.npy
--- a/dinge_embedding_az_12.npy
+++ b/dinge_embedding_az_12.npy
--- a/dinge_embedding_az_8.npy
+++ b/dinge_embedding_az_8.npy
--- a/dinge_embedding_az_9.npy
+++ b/dinge_embedding_az_9.npy
--- a/embedding.py
+++ b/embedding.py
@@ -44,7 +44,7 @@ def handle_jx(jx):
 
				 
			
 
				 
			
 
				 client_ = OpenAI(
			
 
				-    api_key='sk-a4f957065f5c4263bb1f39abe76bdce8',  # 如果您没有配置环境变量，请在此处用您的API Key进行替换
			
 
				+    api_key='sk-7c7be9c8dda84cb98901c98e0c74a2d8',  # 如果您没有配置环境变量，请在此处用您的API Key进行替换
			
 
				     base_url="https://dashscope.aliyuncs.com/compatible-mode/v1"  # 百炼服务的base_url
			
 
				 )
			
 
				 
			
@@ -54,7 +54,7 @@ array3 = np.array([])
 
				 array4 = np.zeros((1,1024))
			
 
				 
			
 
				 count = 0
			
 
				-regex_pattern1 = re.compile("^7-", re.IGNORECASE)
			
 
				+regex_pattern1 = re.compile("^(?!1-|2-|3-|4-|5-|6-|7-|8-|9-|10-|11-).*", re.IGNORECASE)
			
 
				 
			
 
				 for post in collection.find({"zhuanye": "安装", "DEBH": {"$regex" : regex_pattern1}}):
			
 
				    
			
@@ -77,8 +77,8 @@ for post in collection.find({"zhuanye": "安装", "DEBH": {"$regex" : regex_patt
 
				 
			
 
				 con = np.stack((array1, array2, array3))
			
 
				 con = np.transpose(con)
			
 
				-np.save('dinge_content_az_7.npy', con)
			
 
				-np.save('dinge_embedding_az_7.npy', array4[1:])
			
 
				+np.save('dinge_content_az_12.npy', con)
			
 
				+np.save('dinge_embedding_az_12.npy', array4[1:])
			
 
				 
			
 
				     
			
 
				 
			
--- a/main.py
+++ b/main.py
@@ -6,6 +6,7 @@ import uuid
 
				 import re
			
 
				 import zipfile
			
 
				 import json
			
 
				+import chromadb
			
 
				 from fastapi.middleware.cors import CORSMiddleware
			
 
				 from fastapi.middleware.gzip import GZipMiddleware
			
 
				 from sentence_transformers  import SentenceTransformer
			
@@ -20,6 +21,7 @@ import numpy as np
 
				 from fastapi.staticfiles import StaticFiles
			
 
				 from pymongo import AsyncMongoClient
			
 
				 client = AsyncMongoClient()
			
 
				+chroma_client = chromadb.HttpClient(host='localhost', port=8000)
			
 
				 from fastapi.responses import FileResponse
			
 
				 from fastapi_cache import FastAPICache
			
 
				 from inmemory import InMemoryBackend
			
@@ -924,6 +926,12 @@ async def zujia(r: Info):
 
				     ##print(data)
			
 
				     return await db.zujia(client, r.name)
			
 
				 
			
 
				+@app.post("/tuijian/")
			
 
				+async def tuijian(r: Info):
			
 
				+    
			
 
				+    ##print(data)
			
 
				+    return await db.tuijian(chroma_client, r.name)
			
 
				+
			
 
				 
			
 
				 @app.post("/frequency/")
			
 
				 async def frequency(r: InfoWithID):
			
--- a/preprocess/saveEmbedding.py
+++ b/preprocess/saveEmbedding.py
@@ -5,10 +5,11 @@ import copy
 
				 import chromadb
			
 
				 import numpy as np 
			
 
				 client = chromadb.HttpClient(host='localhost', port=8000)
			
 
				-collection = client.get_or_create_collection(name="de-xstj")
			
 
				-a = np.load("dinge_embedding_xstj.npy")
			
 
				-content = np.load("dinge_content_xstj.npy")
			
 
				+collection = client.get_or_create_collection(name="qingdan")
			
 
				+a = np.load("qd_embedding.npy")
			
 
				+content = np.load("qd_content.npy")
			
 
				 for i in range(len(a)):
			
 
				+    print(i)
			
 
				     collection.add(
			
 
				     ids=[content[i][0]],
			
 
				     embeddings=[a[i]],
			
--- a/qd_content.npy
+++ b/qd_content.npy
--- a/qd_embedding.npy
+++ b/qd_embedding.npy
--- a/subdir/db.py
+++ b/subdir/db.py
@@ -2443,6 +2443,67 @@ async def tiaojia(client, biao_id, bh, bm, mingcheng, danwei, jiage, glf, lr, bz
 
				 
			
 
				 #################清单AI#############################
			
 
				 
			
 
				+async def tuijian(client, query):##default return 10 neighbors
			
 
				+    
			
 
				+    collection = client.get_or_create_collection(name="qingdan")
			
 
				+    result = collection.get(ids=[query], include=["documents", "metadatas", "embeddings"])
			
 
				+    if result and len(result['embeddings']) > 0:
			
 
				+        result_ = re.findall(r'\d{9}', query)
			
 
				+        bh = result_[0]
			
 
				+        if bh[0:2] == '01':
			
 
				+            collection = client.get_or_create_collection(name="de-tj")
			
 
				+            hit = collection.query(
			
 
				+                query_embeddings=result['embeddings']
			
 
				+            )
			
 
				+            answer = []
			
 
				+            if len(hit['ids']) == 0:
			
 
				+                return answer
			
 
				+            for i in range(len(hit['ids'][0])):
			
 
				+                answer.append({'组价定额': hit['ids'][0][i], '定额名称': hit['documents'][0][i], "id": hit['ids'][0][i]})
			
 
				+            return answer
			
 
				+        elif bh[0:2] == '03':
			
 
				+            collection = client.get_or_create_collection(name="de-az")
			
 
				+            hit = collection.query(
			
 
				+                query_embeddings=result['embeddings']
			
 
				+            )
			
 
				+            answer = []
			
 
				+            if len(hit['ids']) == 0:
			
 
				+                return answer
			
 
				+            for i in range(len(hit['ids'][0])):
			
 
				+                answer.append({'组价定额': hit['ids'][0][i], '定额名称': hit['documents'][0][i], "id": hit['ids'][0][i]})
			
 
				+            return answer
			
 
				+        elif bh[0:2] == '04':
			
 
				+            collection = client.get_or_create_collection(name="de-sz")
			
 
				+            hit = collection.query(
			
 
				+                query_embeddings=result['embeddings']
			
 
				+            )
			
 
				+            answer = []
			
 
				+            if len(hit['ids']) == 0:
			
 
				+                return answer
			
 
				+            for i in range(len(hit['ids'][0])):
			
 
				+                answer.append({'组价定额': hit['ids'][0][i], '定额名称': hit['documents'][0][i], "id": hit['ids'][0][i]})
			
 
				+            return answer
			
 
				+        elif bh[0:2] == '05':
			
 
				+            collection = client.get_or_create_collection(name="de-yl")
			
 
				+            hit = collection.query(
			
 
				+                query_embeddings=result['embeddings']
			
 
				+            )
			
 
				+            answer = []
			
 
				+            if len(hit['ids']) == 0:
			
 
				+                return answer
			
 
				+            for i in range(len(hit['ids'][0])):
			
 
				+                answer.append({'组价定额': hit['ids'][0][i], '定额名称': hit['documents'][0][i], "id": hit['ids'][0][i]})
			
 
				+            return answer
			
 
				+        else:
			
 
				+            return []
			
 
				+        
			
 
				+
			
 
				+    else:
			
 
				+        return []
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				 async def zujia(client, query):
			
 
				     db = client["baojia"]
			
 
				     collection = db["qdxm"]
			
--- a/subdir/service.py
+++ b/subdir/service.py
@@ -758,7 +758,7 @@ def processQds(dataframe, from_, end, level):
 
				         item = dataframe.iloc[i]
			
 
				         fbcch = str(item['fbcch'])
			
 
				         if fbcch == str(level):
			
 
				-            entry = {"id": str(item["ID"]),"key": str(item["ID"]), "value": str(item["ID"]), "label": str(item['qdbh']) + " " + str(item["xmmc"]), "title": str(item['qdbh'])}
			
 
				+            entry = {"id": str(item["ID"]),"key": str(item["ID"]), "value": str(item["ID"]), "name": str(item['qdbh']) + " " + str(item["xmmc"]), "title": str(item['qdbh'])}
			
 
				             next = i + 1
			
 
				             while next < end and str(dataframe.iloc[next]["fbcch"]) != str(level):
			
 
				                 next = next + 1