import pandas as pd import os import copy path = "/Users/xiaopengzhang/Downloads/qingdan/de" dirs = os.listdir("/Users/xiaopengzhang/Downloads/qingdan/de") def processDes(dataframe, from_, end, level): result = [] for i in range(from_, end): item = dataframe.iloc[i] fbcch = str(item['fbcch']) if fbcch == str(level): entry = {"id": str(item["ID"]), "label": str(item["zjbt"]), "zjh": str(item["zjh"])} next = i + 1 while next < end and str(dataframe.iloc[next]["fbcch"]) != str(level): next = next + 1 entry["children"] = processDes(dataframe, i + 1, next, level + 1) result.append(entry) return result for dir in dirs: print(dir) if dir == "DeData_Fl.xml": pass else: files = os.listdir(os.path.join(path, dir)) def traverse(node, name, collection): name.append(node["label"]) child = node["children"] if len(child) == 0: name.append(node["zjh"]) collection.append(copy.deepcopy(name)) name.pop() else: for entry in child: traverse(entry, name, collection) name.pop() for file in files: if "DingEShu" in file: print(file) df = pd.read_csv(os.path.join(path, dir, file)) df2 = pd.read_csv(os.path.join(path, dir, "JD_DanWeiGJ.csv")) newdf = [] length = len(df) tree = processDes(df, 1, length, 2) collection = [] for entry in tree: name = [] traverse(entry, name, collection) #print(collection) for entry in collection: bh = entry[-1] filtered = df2[df2["YSDELBH"] == bh] if len(filtered) == 0: filtered = df2[df2["YSDELBH"] == int(bh)] ##print(filtered) filtered["parent"] = " ".join(entry[:-1]) #print(filtered) newdf.append(filtered) processed = pd.concat(newdf) #print(processed) processed.to_csv(os.path.join(path, dir, "DingE_Processed" + dir + ".csv")) ##break