| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687 |
- import pymongo
- import pandas
- import json
- from pymongo import MongoClient
- import os
- import re
- import time
- from openai import OpenAI
- import numpy as np
- client = MongoClient()
- db = client["dinge"]
- collection = db["de-collection"]
- from subdir import service
- ##print(collection.find_one({"DEBH": "3-94"}))
- def handle_cl(cl):
- result = {}
- result["CLBH"] = cl["CLBH"]
- result["CLMC"] = cl["CLMC"]
- result["JLDW"] = cl["JLDW"]
- result["YSJG"] = cl["YSJG"]
- result["SL"] = cl["SL"]
- result["HJ"] = cl["HJ"]
- return result
- def handle_rg(rg):
- result = {}
- result["CLBH"] = rg["CLBH"]
- result["CLMC"] = rg["CLMC"]
- result["JLDW"] = rg["JLDW"]
- result["YSJG"] = rg["YSJG"]
- result["gr"] = rg["gr"]
- result["gf"] = rg["gf"]
- return result
- def handle_jx(jx):
- result = {}
- result["jxbh"] = jx["jxbh"]
- result["jxmc"] = jx["jxmc"]
- result["DW"] = jx["DW"]
- result["tbdj"] = jx["tbdj"]
- result["sl"] = jx["sl"]
- result["hj"] = jx["hj"]
- return result
- client_ = OpenAI(
- api_key='sk-7c7be9c8dda84cb98901c98e0c74a2d8', # 如果您没有配置环境变量,请在此处用您的API Key进行替换
- base_url="https://dashscope.aliyuncs.com/compatible-mode/v1" # 百炼服务的base_url
- )
- array1 = np.array([])
- array2 = np.array([])
- array3 = np.array([])
- array4 = np.zeros((1,1024))
- count = 0
- regex_pattern1 = re.compile("^(?!1-|2-|3-|4-|5-|6-|7-|8-|9-|10-|11-).*", re.IGNORECASE)
- for post in collection.find({"zhuanye": "安装", "DEBH": {"$regex" : regex_pattern1}}):
-
- array1 = np.append(array1, post['DEBH'])
- array2 = np.append(array2, post['GCLMC'])
- array3 = np.append(array3, post['parent'])
-
- completion = client_.embeddings.create(
- model="text-embedding-v4",
- input='类别: ' + post['parent'] + ", 内容:" + post["GCLMC"],
- dimensions=1024, # 指定向量维度(仅 text-embedding-v3及 text-embedding-v4支持该参数)
- encoding_format="float"
- )
- ##print(completion.data[0].embedding)
- array4 = np.vstack((array4, [completion.data[0].embedding]))
- count = count + 1
- print(count)
- time.sleep(0.5)
-
- con = np.stack((array1, array2, array3))
- con = np.transpose(con)
- np.save('dinge_content_az_12.npy', con)
- np.save('dinge_embedding_az_12.npy', array4[1:])
-
|