processDe.py 2.2 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162
  1. import pandas as pd
  2. import os
  3. import copy
  4. path = "/Users/xiaopengzhang/Downloads/qingdan/de"
  5. dirs = os.listdir("/Users/xiaopengzhang/Downloads/qingdan/de")
  6. def processDes(dataframe, from_, end, level):
  7. result = []
  8. for i in range(from_, end):
  9. item = dataframe.iloc[i]
  10. fbcch = str(item['fbcch'])
  11. if fbcch == str(level):
  12. entry = {"id": str(item["ID"]), "label": str(item["zjbt"]), "zjh": str(item["zjh"])}
  13. next = i + 1
  14. while next < end and str(dataframe.iloc[next]["fbcch"]) != str(level):
  15. next = next + 1
  16. entry["children"] = processDes(dataframe, i + 1, next, level + 1)
  17. result.append(entry)
  18. return result
  19. for dir in dirs:
  20. print(dir)
  21. files = os.listdir(os.path.join(path, dir))
  22. def traverse(node, name, collection):
  23. name.append(node["label"])
  24. child = node["children"]
  25. if len(child) == 0:
  26. name.append(node["zjh"])
  27. collection.append(copy.deepcopy(name))
  28. name.pop()
  29. else:
  30. for entry in child:
  31. traverse(entry, name, collection)
  32. name.pop()
  33. for file in files:
  34. if "DingEShu" in file:
  35. print(file)
  36. df = pd.read_csv(os.path.join(path, dir, file))
  37. df2 = pd.read_csv(os.path.join(path, dir, "JD_DanWeiGJ.csv"))
  38. newdf = []
  39. length = len(df)
  40. tree = processDes(df, 1, length, 2)
  41. collection = []
  42. for entry in tree:
  43. name = []
  44. traverse(entry, name, collection)
  45. #print(collection)
  46. for entry in collection:
  47. bh = entry[-1]
  48. filtered = df2[df2["YSDELBH"] == bh]
  49. if len(filtered) == 0:
  50. filtered = df2[df2["YSDELBH"] == int(bh)]
  51. ##print(filtered)
  52. filtered["parent"] = " ".join(entry[:-1])
  53. #print(filtered)
  54. newdf.append(filtered)
  55. processed = pd.concat(newdf)
  56. #print(processed)
  57. processed.to_csv(os.path.join(path, dir, "DingE_Processed" + dir + ".csv"))
  58. ##break