diff --git a/score5.py b/score5.py index 0f9c899..95cb5b8 100644 --- a/score5.py +++ b/score5.py @@ -52,14 +52,13 @@ class XMLScorer: return None def chart_query_xml(self, tree, xpath, namespaces): - try: - result = tree.xpath(xpath, namespaces=namespaces) - if type(result) is list and len(result) == 0: - return None - - return result - except ET.XPathEvalError as e: + + result = tree.xpath(xpath, namespaces=namespaces) + if type(result) is list and len(result) == 0: return None + + return result + # 유사한 텍스트 찾기 def find_similar_text(self, root, target_text, threshold=0.5): @@ -109,7 +108,7 @@ class XMLScorer: } if chart_xml is None: - chart_tree = ET.fromstring('') + chart_tree = ET.fromstring('') else: chart_tree = ET.fromstring(chart_xml) @@ -235,47 +234,42 @@ class XMLScorer: } def binary_to_chartxml(self, xml_path): - try: - print(f'binary_to_chartxml {xml_path}') - tree = ET.parse(xml_path) - root = tree.getroot() - - binary_data = root.xpath('//BINDATA[@Id=//BINITEM[@Format="OLE"]/@BinData]/text()') - if not binary_data: - raise ValueError("No binary data found in the XML.") - binary_data = binary_data[0].encode('utf-8') - - # 태그와 그 내부 내용을 삭제합니다. - encoded_data = re.sub(b'', b'', binary_data) - encoded_data = encoded_data.replace(b'', b'') - encoded_data = encoded_data.replace(b'\r\n', b'') - - # base64 디코딩을 수행합니다. - decoded_data = base64.b64decode(encoded_data+b'==') - - # 디코딩된 데이터 내용 중 xml 형식만 추출할 때 , 사이의 데이터만 추출. - start = decoded_data.find(b'') - print(end) - xml_data = decoded_data[start:end+len(b'')] - - # 디코딩된 데이터를 파일로 저장합니다. - base_filename = os.path.splitext(xml_path)[0] - new_filename = f'{base_filename}.xml' - with open(new_filename, 'wb') as file: - file.write(xml_data) - - return xml_data - except ET.ParseError as e: - print(f"XML 파싱 오류: {str(e)}") - except IndexError as e: - print(f"IndexError: {str(e)}") - except ValueError as e: - print(f"ValueError: {str(e)}") - except Exception as e: - print(f"Unexpected error: {str(e)}") + print(f'binary_to_chartxml {xml_path}') + tree = ET.parse(xml_path) + root = tree.getroot() + + binary_data = root.xpath('//BINDATA[@Id=//BINITEM[@Format="OLE"]/@BinData]/text()') + if not binary_data: + return None + binary_data = binary_data[0].encode('utf-8') + + # 태그와 그 내부 내용을 삭제합니다. + encoded_data = re.sub(b'', b'', binary_data) + encoded_data = encoded_data.replace(b'', b'') + encoded_data = encoded_data.replace(b'\r\n', b'') + + # base64 디코딩을 수행합니다. + decoded_data = base64.b64decode(encoded_data+b'==') + + # 디코딩된 데이터 내용 중 xml 형식만 추출할 때 , 사이의 데이터만 추출. + start = decoded_data.find(b'') + print(end) + xml_data = decoded_data[start:end+len(b'')] + + if -1 in [start, end]: + return None + + # 디코딩된 데이터를 파일로 저장합니다. + base_filename = os.path.splitext(xml_path)[0] + new_filename = f'{base_filename}.xml' + with open(new_filename, 'wb') as file: + file.write(xml_data) + + return xml_data + # XML 파일 채점 def score_directory(self, xml_directory): @@ -365,10 +359,10 @@ class XMLScorer: def main(): - scoring_criteria_path = r'C:\Users\dra\project\HWP-Scoring\scoring_criteria.json' + scoring_criteria_path = r'./scoring_criteria.json' # xml(hml)파일 디렉토리 경로 - xml_directory = r'C:\Users\dra\project\HWP-Scoring\output' + xml_directory = r'./output' # 채점 클래스 초기화 scorer = XMLScorer(scoring_criteria_path)