| 12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061 |
- import pymongo
- import pandas
- import json
- from pymongo import MongoClient
- import os
- import re
- import time
- from openai import OpenAI
- import numpy as np
- client = MongoClient()
- db = client["dinge"]
- collection = db["de-collection"]
- from subdir import service
- ##print(collection.find_one({"DEBH": "3-94"}))
- client_ = OpenAI(
- api_key='sk-7c7be9c8dda84cb98901c98e0c74a2d8', # 如果您没有配置环境变量,请在此处用您的API Key进行替换
- base_url="https://dashscope.aliyuncs.com/compatible-mode/v1" # 百炼服务的base_url
- )
- array1 = np.array([])
- array2 = np.array([])
- array3 = np.array([])
- array4 = np.zeros((1,1024))
- count = 0
- qd = pandas.read_csv("JD_QingDanXM_parent.csv")
- for i in range(len(qd)):
- row = qd.iloc[i]
- if row['fbcch'].item() == 4:
-
- array1 = np.append(array1, row['qdbh'])
- array2 = np.append(array2, row['xmmc'])
- array3 = np.append(array3, row['parent'])
-
- completion = client_.embeddings.create(
- model="text-embedding-v4",
- input='类别: ' + row['parent'] + ", 内容:" + row["xmmc"],
- dimensions=1024, # 指定向量维度(仅 text-embedding-v3及 text-embedding-v4支持该参数)
- encoding_format="float"
- )
- ##print(completion.data[0].embedding)
- array4 = np.vstack((array4, [completion.data[0].embedding]))
- count = count + 1
- print(count)
- time.sleep(0.5)
-
- con = np.stack((array1, array2, array3))
- con = np.transpose(con)
- np.save('qd_content.npy', con)
- np.save('qd_embedding.npy', array4[1:])
-
|