Xiaopeng Zhang 4 ماه پیش
والد
کامیت
1bd6e977c1
4فایلهای تغییر یافته به همراه180 افزوده شده و 172 حذف شده
  1. 3 1
      README
  2. 160 166
      saveDE.ipynb
  3. 6 3
      specialdetection.py
  4. 11 2
      subdir/db.py

+ 3 - 1
README

@@ -75,6 +75,8 @@ JD_PeiBiF 表很有用,它可以用来找一个材料的组成。
 db.collection.createIndex({'DEBH': 'text'})
 
 灰土2:8 好像也是拆分的?
-素水泥浆 好像也是拆分的?
+(901胶)素水泥浆 好像也是拆分的?
+
+
 
 

تفاوت فایلی نمایش داده نمی شود زیرا این فایل بسیار بزرگ است
+ 160 - 166
saveDE.ipynb


+ 6 - 3
specialdetection.py

@@ -25,14 +25,17 @@ def clean(debh):
         debh = debh[:position-1]
     return debh
 async def main():
-    for post in collection.find({"biao_id": '68a44e464966105da88382d4', 'Dwgcbh': '005'}):
+    
+    for post in collection.find({"biao_id": '689c49e092a763b02ec38603', 'Dwgcbh': '006'}):##total 006
         for entry in (post['__children']):
         ##print(entry['清单编码'])
             cleaned = clean(entry['清单编码'])
+            
             if cleaned.startswith("D"):
                 continue
             dercj = (entry['dercj'])
-            A1, A2, rg, jx, cl, A6, A7, zhuanye = await dbservice.getSingleDeXilie_(model, client2, 10, cleaned, entry['名称'])
+            A1, A2, rg, jx, cl, A6, A7, zhuanye = await dbservice.getSingleDeXilie_(model, client2, 10, cleaned, post['bt'] + " " + entry['名称'])
+            
             if A1 == None:
                 if  '-F' in cleaned  :
                     pass
@@ -64,7 +67,7 @@ async def main():
                 #and cleaned !='5-28' 
                 #and cleaned != '4-3' 
                     if (not hit) and item[1] != 'S00001' and item[1] != '410000F' and item[1] != 'JD0001' and (not '二类工' in item[2]) and (not '水泥' in item[2]) \
-                    and (not '砂浆' in item[2]) and item[1] != '99090513' and item[1] != '99090509':
+                    and (not '砂浆' in item[2]) and item[1] != '99090513' and item[1] != '99090509' and item[1] != '4F0000':
                         print(cleaned)
                         print("special case found for" + str(item))
                     

+ 11 - 2
subdir/db.py

@@ -1390,6 +1390,7 @@ async def getSingleDeXilie_(model, client, zhuanye, debh, mc):
     count = 0
     actual_zhuanye = []
     mcs = []
+    parents = []
     if "附注" in debh:
         position = debh.find("附注")
         debh = debh[:position]
@@ -1405,6 +1406,7 @@ async def getSingleDeXilie_(model, client, zhuanye, debh, mc):
         ##print(post)
         actual_zhuanye.append(post['zhuanye'])
         mcs.append(post['GCLMC'])
+        parents.append(post['parent'])
         count = count + 1
     if count == 0:
         return  None, None, None, None, None, None, None, zhuanye
@@ -1430,7 +1432,7 @@ async def getSingleDeXilie_(model, client, zhuanye, debh, mc):
     else:
         hit = False
         for i in range(0, count):
-            if mcs[i] == mc:
+            if mcs[i] in mc:
                 hit = True
                 actual_zhuanye[0] = actual_zhuanye[i]
         if hit:
@@ -1453,7 +1455,10 @@ async def getSingleDeXilie_(model, client, zhuanye, debh, mc):
                 A1, A2, A3, A4, A5, A6, A7 = service.getSingleDeXilie_xsaz(debh)
                 return A1, A2, A3, A4, A5, A6, A7, 60   
         else:
+            for i in range(len(parents)):
+                mcs[i] = parents[i] + ' ' + mcs[i]
             mcs.append(mc)
+            #print(mcs)
             embeddings = model.encode(mcs)
             similarities = model.similarity( embeddings[-1], embeddings)
             ##array([1.0000002, 0.7662151, 1.0000002], dtype=float32)
@@ -1490,6 +1495,7 @@ async def getSingleDeXilie(model, client, zhuanye, debh, mc):
     count = 0
     actual_zhuanye = []
     mcs = []
+    parents = []
     if "附注" in debh:
         position = debh.find("附注")
         debh = debh[:position]
@@ -1505,6 +1511,7 @@ async def getSingleDeXilie(model, client, zhuanye, debh, mc):
         ##print(post)
         actual_zhuanye.append(post['zhuanye'])
         mcs.append(post['GCLMC'])
+        parents.append(post['parent'])
         count = count + 1
     if count == 0:
         return  None, None, None, None, None, None, None, zhuanye
@@ -1530,7 +1537,7 @@ async def getSingleDeXilie(model, client, zhuanye, debh, mc):
     else:
         hit = False
         for i in range(0, count):
-            if mcs[i] == mc:
+            if mcs[i] in mc:
                 hit = True
                 actual_zhuanye[0] = actual_zhuanye[i]
         if hit:
@@ -1553,6 +1560,8 @@ async def getSingleDeXilie(model, client, zhuanye, debh, mc):
                 A1, A2, A3, A4, A5, A6, A7 = service.getSingleDeXilie_xsaz(debh)
                 return A1, A2, A3, A4, A5, A6, A7, 60   
         else:
+            for i in range(len(parents)):
+                mcs[i] = parents[i] + " " + mcs[i]
             mcs.append(mc)
             embeddings = model.encode(mcs)
             similarities = model.similarity( embeddings[-1], embeddings)

برخی فایل ها در این مقایسه diff نمایش داده نمی شوند زیرا تعداد فایل ها بسیار زیاد است