preprocess.py 568 B

12345678910111213141516171819
  1. import chromadb
  2. client = chromadb.HttpClient(host='47.101.198.30',port=8000)
  3. collection = client.get_or_create_collection(name="tj_de_bge")
  4. from FlagEmbedding import FlagModel
  5. model = FlagModel('/Users/zxp/Downloads/test2_encoder_only_base_bge-large-zh-v1.5')
  6. import json
  7. with open("name_label", "r") as f:
  8. content = f.read()
  9. obj = json.loads(content)
  10. for entry in obj:
  11. print(entry)
  12. sentences = [obj[entry]]
  13. embeddings = model.encode(sentences)
  14. collection.add(
  15. ids=[entry],
  16. embeddings=embeddings,
  17. documents=sentences
  18. )