Browse Source

before_login

Xiaopeng Zhang 5 months ago
parent
commit
2c28ace06a

+ 61 - 0
Qdembedding.py

@@ -0,0 +1,61 @@
+import pymongo
+import pandas
+import json
+from pymongo import MongoClient
+import os
+import re
+import time
+from openai import OpenAI
+import numpy as np
+client = MongoClient()
+db = client["dinge"]
+collection = db["de-collection"]
+from subdir import service
+##print(collection.find_one({"DEBH": "3-94"}))
+
+
+
+
+client_ = OpenAI(
+    api_key='sk-7c7be9c8dda84cb98901c98e0c74a2d8',  # 如果您没有配置环境变量,请在此处用您的API Key进行替换
+    base_url="https://dashscope.aliyuncs.com/compatible-mode/v1"  # 百炼服务的base_url
+)
+
+array1 = np.array([])
+array2 = np.array([])
+array3 = np.array([])
+array4 = np.zeros((1,1024))
+
+count = 0
+qd = pandas.read_csv("JD_QingDanXM_parent.csv")
+for i in range(len(qd)):
+    row = qd.iloc[i] 
+    if row['fbcch'].item() == 4:
+   
+        array1 = np.append(array1, row['qdbh'])
+        array2 = np.append(array2, row['xmmc'])
+        array3 = np.append(array3, row['parent'])
+   
+        completion = client_.embeddings.create(
+    model="text-embedding-v4",
+    input='类别: ' + row['parent'] + ", 内容:" + row["xmmc"],
+    dimensions=1024, # 指定向量维度(仅 text-embedding-v3及 text-embedding-v4支持该参数)
+    encoding_format="float"
+   )
+   ##print(completion.data[0].embedding)
+        array4 = np.vstack((array4, [completion.data[0].embedding]))
+        count = count + 1
+        print(count)
+        time.sleep(0.5)
+   
+
+con = np.stack((array1, array2, array3))
+con = np.transpose(con)
+np.save('qd_content.npy', con)
+np.save('qd_embedding.npy', array4[1:])
+
+    
+
+
+
+

BIN
dinge_content_az_10.npy


BIN
dinge_content_az_11.npy


BIN
dinge_content_az_12.npy


BIN
dinge_content_az_8.npy


BIN
dinge_content_az_9.npy


BIN
dinge_embedding_az_10.npy


BIN
dinge_embedding_az_11.npy


BIN
dinge_embedding_az_12.npy


BIN
dinge_embedding_az_8.npy


BIN
dinge_embedding_az_9.npy


+ 4 - 4
embedding.py

@@ -44,7 +44,7 @@ def handle_jx(jx):
 
 
 client_ = OpenAI(
-    api_key='sk-a4f957065f5c4263bb1f39abe76bdce8',  # 如果您没有配置环境变量,请在此处用您的API Key进行替换
+    api_key='sk-7c7be9c8dda84cb98901c98e0c74a2d8',  # 如果您没有配置环境变量,请在此处用您的API Key进行替换
     base_url="https://dashscope.aliyuncs.com/compatible-mode/v1"  # 百炼服务的base_url
 )
 
@@ -54,7 +54,7 @@ array3 = np.array([])
 array4 = np.zeros((1,1024))
 
 count = 0
-regex_pattern1 = re.compile("^7-", re.IGNORECASE)
+regex_pattern1 = re.compile("^(?!1-|2-|3-|4-|5-|6-|7-|8-|9-|10-|11-).*", re.IGNORECASE)
 
 for post in collection.find({"zhuanye": "安装", "DEBH": {"$regex" : regex_pattern1}}):
    
@@ -77,8 +77,8 @@ for post in collection.find({"zhuanye": "安装", "DEBH": {"$regex" : regex_patt
 
 con = np.stack((array1, array2, array3))
 con = np.transpose(con)
-np.save('dinge_content_az_7.npy', con)
-np.save('dinge_embedding_az_7.npy', array4[1:])
+np.save('dinge_content_az_12.npy', con)
+np.save('dinge_embedding_az_12.npy', array4[1:])
 
     
 

+ 8 - 0
main.py

@@ -6,6 +6,7 @@ import uuid
 import re
 import zipfile
 import json
+import chromadb
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.middleware.gzip import GZipMiddleware
 from sentence_transformers  import SentenceTransformer
@@ -20,6 +21,7 @@ import numpy as np
 from fastapi.staticfiles import StaticFiles
 from pymongo import AsyncMongoClient
 client = AsyncMongoClient()
+chroma_client = chromadb.HttpClient(host='localhost', port=8000)
 from fastapi.responses import FileResponse
 from fastapi_cache import FastAPICache
 from inmemory import InMemoryBackend
@@ -924,6 +926,12 @@ async def zujia(r: Info):
     ##print(data)
     return await db.zujia(client, r.name)
 
+@app.post("/tuijian/")
+async def tuijian(r: Info):
+    
+    ##print(data)
+    return await db.tuijian(chroma_client, r.name)
+
 
 @app.post("/frequency/")
 async def frequency(r: InfoWithID):

+ 4 - 3
preprocess/saveEmbedding.py

@@ -5,10 +5,11 @@ import copy
 import chromadb
 import numpy as np 
 client = chromadb.HttpClient(host='localhost', port=8000)
-collection = client.get_or_create_collection(name="de-xstj")
-a = np.load("dinge_embedding_xstj.npy")
-content = np.load("dinge_content_xstj.npy")
+collection = client.get_or_create_collection(name="qingdan")
+a = np.load("qd_embedding.npy")
+content = np.load("qd_content.npy")
 for i in range(len(a)):
+    print(i)
     collection.add(
     ids=[content[i][0]],
     embeddings=[a[i]],

BIN
qd_content.npy


BIN
qd_embedding.npy


+ 61 - 0
subdir/db.py

@@ -2443,6 +2443,67 @@ async def tiaojia(client, biao_id, bh, bm, mingcheng, danwei, jiage, glf, lr, bz
 
 #################清单AI#############################
 
+async def tuijian(client, query):##default return 10 neighbors
+    
+    collection = client.get_or_create_collection(name="qingdan")
+    result = collection.get(ids=[query], include=["documents", "metadatas", "embeddings"])
+    if result and len(result['embeddings']) > 0:
+        result_ = re.findall(r'\d{9}', query)
+        bh = result_[0]
+        if bh[0:2] == '01':
+            collection = client.get_or_create_collection(name="de-tj")
+            hit = collection.query(
+                query_embeddings=result['embeddings']
+            )
+            answer = []
+            if len(hit['ids']) == 0:
+                return answer
+            for i in range(len(hit['ids'][0])):
+                answer.append({'组价定额': hit['ids'][0][i], '定额名称': hit['documents'][0][i], "id": hit['ids'][0][i]})
+            return answer
+        elif bh[0:2] == '03':
+            collection = client.get_or_create_collection(name="de-az")
+            hit = collection.query(
+                query_embeddings=result['embeddings']
+            )
+            answer = []
+            if len(hit['ids']) == 0:
+                return answer
+            for i in range(len(hit['ids'][0])):
+                answer.append({'组价定额': hit['ids'][0][i], '定额名称': hit['documents'][0][i], "id": hit['ids'][0][i]})
+            return answer
+        elif bh[0:2] == '04':
+            collection = client.get_or_create_collection(name="de-sz")
+            hit = collection.query(
+                query_embeddings=result['embeddings']
+            )
+            answer = []
+            if len(hit['ids']) == 0:
+                return answer
+            for i in range(len(hit['ids'][0])):
+                answer.append({'组价定额': hit['ids'][0][i], '定额名称': hit['documents'][0][i], "id": hit['ids'][0][i]})
+            return answer
+        elif bh[0:2] == '05':
+            collection = client.get_or_create_collection(name="de-yl")
+            hit = collection.query(
+                query_embeddings=result['embeddings']
+            )
+            answer = []
+            if len(hit['ids']) == 0:
+                return answer
+            for i in range(len(hit['ids'][0])):
+                answer.append({'组价定额': hit['ids'][0][i], '定额名称': hit['documents'][0][i], "id": hit['ids'][0][i]})
+            return answer
+        else:
+            return []
+        
+
+    else:
+        return []
+
+
+
+
 async def zujia(client, query):
     db = client["baojia"]
     collection = db["qdxm"]

+ 1 - 1
subdir/service.py

@@ -758,7 +758,7 @@ def processQds(dataframe, from_, end, level):
         item = dataframe.iloc[i]
         fbcch = str(item['fbcch'])
         if fbcch == str(level):
-            entry = {"id": str(item["ID"]),"key": str(item["ID"]), "value": str(item["ID"]), "label": str(item['qdbh']) + " " + str(item["xmmc"]), "title": str(item['qdbh'])}
+            entry = {"id": str(item["ID"]),"key": str(item["ID"]), "value": str(item["ID"]), "name": str(item['qdbh']) + " " + str(item["xmmc"]), "title": str(item['qdbh'])}
             next = i + 1
             while next < end and str(dataframe.iloc[next]["fbcch"]) != str(level):
                 next = next + 1