Qdembedding.py 1.5 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061
  1. import pymongo
  2. import pandas
  3. import json
  4. from pymongo import MongoClient
  5. import os
  6. import re
  7. import time
  8. from openai import OpenAI
  9. import numpy as np
  10. client = MongoClient()
  11. db = client["dinge"]
  12. collection = db["de-collection"]
  13. from subdir import service
  14. ##print(collection.find_one({"DEBH": "3-94"}))
  15. client_ = OpenAI(
  16. api_key='sk-7c7be9c8dda84cb98901c98e0c74a2d8', # 如果您没有配置环境变量,请在此处用您的API Key进行替换
  17. base_url="https://dashscope.aliyuncs.com/compatible-mode/v1" # 百炼服务的base_url
  18. )
  19. array1 = np.array([])
  20. array2 = np.array([])
  21. array3 = np.array([])
  22. array4 = np.zeros((1,1024))
  23. count = 0
  24. qd = pandas.read_csv("JD_QingDanXM_parent.csv")
  25. for i in range(len(qd)):
  26. row = qd.iloc[i]
  27. if row['fbcch'].item() == 4:
  28. array1 = np.append(array1, row['qdbh'])
  29. array2 = np.append(array2, row['xmmc'])
  30. array3 = np.append(array3, row['parent'])
  31. completion = client_.embeddings.create(
  32. model="text-embedding-v4",
  33. input='类别: ' + row['parent'] + ", 内容:" + row["xmmc"],
  34. dimensions=1024, # 指定向量维度(仅 text-embedding-v3及 text-embedding-v4支持该参数)
  35. encoding_format="float"
  36. )
  37. ##print(completion.data[0].embedding)
  38. array4 = np.vstack((array4, [completion.data[0].embedding]))
  39. count = count + 1
  40. print(count)
  41. time.sleep(0.5)
  42. con = np.stack((array1, array2, array3))
  43. con = np.transpose(con)
  44. np.save('qd_content.npy', con)
  45. np.save('qd_embedding.npy', array4[1:])