processDe.py 2.3 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465
  1. import pandas as pd
  2. import os
  3. import copy
  4. path = "/Users/xiaopengzhang/Downloads/qingdan/de"
  5. dirs = os.listdir("/Users/xiaopengzhang/Downloads/qingdan/de")
  6. def processDes(dataframe, from_, end, level):
  7. result = []
  8. for i in range(from_, end):
  9. item = dataframe.iloc[i]
  10. fbcch = str(item['fbcch'])
  11. if fbcch == str(level):
  12. entry = {"id": str(item["ID"]), "label": str(item["zjbt"]), "zjh": str(item["zjh"])}
  13. next = i + 1
  14. while next < end and str(dataframe.iloc[next]["fbcch"]) != str(level):
  15. next = next + 1
  16. entry["children"] = processDes(dataframe, i + 1, next, level + 1)
  17. result.append(entry)
  18. return result
  19. for dir in dirs:
  20. print(dir)
  21. if dir == "DeData_Fl.xml":
  22. pass
  23. else:
  24. files = os.listdir(os.path.join(path, dir))
  25. def traverse(node, name, collection):
  26. name.append(node["label"])
  27. child = node["children"]
  28. if len(child) == 0:
  29. name.append(node["zjh"])
  30. collection.append(copy.deepcopy(name))
  31. name.pop()
  32. else:
  33. for entry in child:
  34. traverse(entry, name, collection)
  35. name.pop()
  36. for file in files:
  37. if "DingEShu" in file:
  38. print(file)
  39. df = pd.read_csv(os.path.join(path, dir, file))
  40. df2 = pd.read_csv(os.path.join(path, dir, "JD_DanWeiGJ.csv"))
  41. newdf = []
  42. length = len(df)
  43. tree = processDes(df, 1, length, 2)
  44. collection = []
  45. for entry in tree:
  46. name = []
  47. traverse(entry, name, collection)
  48. #print(collection)
  49. for entry in collection:
  50. bh = entry[-1]
  51. filtered = df2[df2["YSDELBH"] == bh]
  52. if len(filtered) == 0:
  53. filtered = df2[df2["YSDELBH"] == int(bh)]
  54. ##print(filtered)
  55. filtered["parent"] = " ".join(entry[:-1])
  56. #print(filtered)
  57. newdf.append(filtered)
  58. processed = pd.concat(newdf)
  59. #print(processed)
  60. processed.to_csv(os.path.join(path, dir, "DingE_Processed" + dir + ".csv"))
  61. ##break