import pymongo import pandas import json from pymongo import MongoClient import os import re import time from openai import OpenAI import numpy as np client = MongoClient() db = client["dinge"] collection = db["de-collection"] from subdir import service ##print(collection.find_one({"DEBH": "3-94"})) def handle_cl(cl): result = {} result["CLBH"] = cl["CLBH"] result["CLMC"] = cl["CLMC"] result["JLDW"] = cl["JLDW"] result["YSJG"] = cl["YSJG"] result["SL"] = cl["SL"] result["HJ"] = cl["HJ"] return result def handle_rg(rg): result = {} result["CLBH"] = rg["CLBH"] result["CLMC"] = rg["CLMC"] result["JLDW"] = rg["JLDW"] result["YSJG"] = rg["YSJG"] result["gr"] = rg["gr"] result["gf"] = rg["gf"] return result def handle_jx(jx): result = {} result["jxbh"] = jx["jxbh"] result["jxmc"] = jx["jxmc"] result["DW"] = jx["DW"] result["tbdj"] = jx["tbdj"] result["sl"] = jx["sl"] result["hj"] = jx["hj"] return result client_ = OpenAI( api_key='sk-7c7be9c8dda84cb98901c98e0c74a2d8', # 如果您没有配置环境变量,请在此处用您的API Key进行替换 base_url="https://dashscope.aliyuncs.com/compatible-mode/v1" # 百炼服务的base_url ) array1 = np.array([]) array2 = np.array([]) array3 = np.array([]) array4 = np.zeros((1,1024)) count = 0 regex_pattern1 = re.compile("^(?!1-|2-|3-|4-|5-|6-|7-|8-|9-|10-|11-).*", re.IGNORECASE) for post in collection.find({"zhuanye": "安装", "DEBH": {"$regex" : regex_pattern1}}): array1 = np.append(array1, post['DEBH']) array2 = np.append(array2, post['GCLMC']) array3 = np.append(array3, post['parent']) completion = client_.embeddings.create( model="text-embedding-v4", input='类别: ' + post['parent'] + ", 内容:" + post["GCLMC"], dimensions=1024, # 指定向量维度(仅 text-embedding-v3及 text-embedding-v4支持该参数) encoding_format="float" ) ##print(completion.data[0].embedding) array4 = np.vstack((array4, [completion.data[0].embedding])) count = count + 1 print(count) time.sleep(0.5) con = np.stack((array1, array2, array3)) con = np.transpose(con) np.save('dinge_content_az_12.npy', con) np.save('dinge_embedding_az_12.npy', array4[1:])