From a88e3603279bd64f15c93f3b5e2c8ef8af0035de Mon Sep 17 00:00:00 2001 From: waterdrw Date: Sat, 18 Jan 2025 00:13:20 +0900 Subject: [PATCH] =?UTF-8?q?XML=20=EB=82=B4=EC=9A=A9=20=EC=A4=91=20Chart=20?= =?UTF-8?q?XML=20=EB=82=B4=EC=9A=A9=EC=9D=B4=20=EC=97=86=EC=9C=BC=EB=A9=B4?= =?UTF-8?q?=200=EC=A0=90=20=EC=B2=98=EB=A6=AC=EB=A1=9C=20=EB=84=98?= =?UTF-8?q?=EC=96=B4=EA=B0=80=EB=8F=84=EB=A1=9D=20=EB=A1=9C=EC=A7=81=20?= =?UTF-8?q?=EC=88=98=EC=A0=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- score5.py | 94 ++++++++++++++++++++++++++----------------------------- 1 file changed, 44 insertions(+), 50 deletions(-) diff --git a/score5.py b/score5.py index 0f9c899..95cb5b8 100644 --- a/score5.py +++ b/score5.py @@ -52,14 +52,13 @@ class XMLScorer: return None def chart_query_xml(self, tree, xpath, namespaces): - try: - result = tree.xpath(xpath, namespaces=namespaces) - if type(result) is list and len(result) == 0: - return None - - return result - except ET.XPathEvalError as e: + + result = tree.xpath(xpath, namespaces=namespaces) + if type(result) is list and len(result) == 0: return None + + return result + # 유사한 텍스트 찾기 def find_similar_text(self, root, target_text, threshold=0.5): @@ -109,7 +108,7 @@ class XMLScorer: } if chart_xml is None: - chart_tree = ET.fromstring('') + chart_tree = ET.fromstring('') else: chart_tree = ET.fromstring(chart_xml) @@ -235,47 +234,42 @@ class XMLScorer: } def binary_to_chartxml(self, xml_path): - try: - print(f'binary_to_chartxml {xml_path}') - tree = ET.parse(xml_path) - root = tree.getroot() - - binary_data = root.xpath('//BINDATA[@Id=//BINITEM[@Format="OLE"]/@BinData]/text()') - if not binary_data: - raise ValueError("No binary data found in the XML.") - binary_data = binary_data[0].encode('utf-8') - - # 태그와 그 내부 내용을 삭제합니다. - encoded_data = re.sub(b'', b'', binary_data) - encoded_data = encoded_data.replace(b'', b'') - encoded_data = encoded_data.replace(b'\r\n', b'') - - # base64 디코딩을 수행합니다. - decoded_data = base64.b64decode(encoded_data+b'==') - - # 디코딩된 데이터 내용 중 xml 형식만 추출할 때 , 사이의 데이터만 추출. - start = decoded_data.find(b'') - print(end) - xml_data = decoded_data[start:end+len(b'')] - - # 디코딩된 데이터를 파일로 저장합니다. - base_filename = os.path.splitext(xml_path)[0] - new_filename = f'{base_filename}.xml' - with open(new_filename, 'wb') as file: - file.write(xml_data) - - return xml_data - except ET.ParseError as e: - print(f"XML 파싱 오류: {str(e)}") - except IndexError as e: - print(f"IndexError: {str(e)}") - except ValueError as e: - print(f"ValueError: {str(e)}") - except Exception as e: - print(f"Unexpected error: {str(e)}") + print(f'binary_to_chartxml {xml_path}') + tree = ET.parse(xml_path) + root = tree.getroot() + + binary_data = root.xpath('//BINDATA[@Id=//BINITEM[@Format="OLE"]/@BinData]/text()') + if not binary_data: + return None + binary_data = binary_data[0].encode('utf-8') + + # 태그와 그 내부 내용을 삭제합니다. + encoded_data = re.sub(b'', b'', binary_data) + encoded_data = encoded_data.replace(b'', b'') + encoded_data = encoded_data.replace(b'\r\n', b'') + + # base64 디코딩을 수행합니다. + decoded_data = base64.b64decode(encoded_data+b'==') + + # 디코딩된 데이터 내용 중 xml 형식만 추출할 때 , 사이의 데이터만 추출. + start = decoded_data.find(b'') + print(end) + xml_data = decoded_data[start:end+len(b'')] + + if -1 in [start, end]: + return None + + # 디코딩된 데이터를 파일로 저장합니다. + base_filename = os.path.splitext(xml_path)[0] + new_filename = f'{base_filename}.xml' + with open(new_filename, 'wb') as file: + file.write(xml_data) + + return xml_data + # XML 파일 채점 def score_directory(self, xml_directory): @@ -365,10 +359,10 @@ class XMLScorer: def main(): - scoring_criteria_path = r'C:\Users\dra\project\HWP-Scoring\scoring_criteria.json' + scoring_criteria_path = r'./scoring_criteria.json' # xml(hml)파일 디렉토리 경로 - xml_directory = r'C:\Users\dra\project\HWP-Scoring\output' + xml_directory = r'./output' # 채점 클래스 초기화 scorer = XMLScorer(scoring_criteria_path)