from pathlib import Path from lxml import etree as ET import base64 import re class binaryToChartxml: def __init__(self, xml_path): self.tree = ET.parse(xml_path) self.root = self.tree.getroot() def decoding_bindata(self): binary_data = self.root.xpath('//BINDATA[@Id=//BINITEM[@Format="OLE"]/@BinData]/text()') binary_data = binary_data[0].encode('utf-8') encoded_data = re.sub(b'', b'', binary_data) # print(encoded_data) encoded_data = encoded_data.replace(b'', b'') encoded_data = encoded_data.replace(b'\r\n', b'') # base64 디코딩을 수행합니다. decoded_data = base64.b64decode(encoded_data+b'==') print(decoded_data) # 디코딩된 데이터 내용 중 xml 형식만 추출할 때 , 사이의 데이터만 추출. start = decoded_data.find(b'') # print(end) self.xml_data = decoded_data[start:end+len(b'')] # def save_chart_xml(self, xml_output_path): # def save_chart_xml(self): # with open(, 'wb') as file: # file.write(self.xml_data) # xml 파일 읽기 # xml_path = r"C:\Users\dra\project\HWP-Scoring\output\워드(한글)-009865-고미송.hml" # tree = ET.parse(xml_path) # root = tree.getroot() # # xpath로 바이너리 부분추출 # binary_data = root.xpath('//BINDATA[@Id=//BINITEM[@Format="OLE"]/@BinData]/text()') # binary_data = str(binary_data[0]) # print(f'binary : {binary_data}') # # base64 디코딩 # decoded_data = base64.b64decode(binary_data) # # 디코딩된 데이터를 파일로 저장 # output_file = f"decoded_output_test.bin" # with open(output_file, "wb") as decoded_file: # decoded_file.write(decoded_data)