From 0d22e3f14661c9a4030a36ccd2192650bc8e525b Mon Sep 17 00:00:00 2001 From: waterdrw Date: Sat, 18 Jan 2025 11:06:24 +0900 Subject: [PATCH] Add typo checking functionality and update scoring method to include typo results --- score5.py | 83 +++++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 78 insertions(+), 5 deletions(-) diff --git a/score5.py b/score5.py index ad9e5f9..deeee3f 100644 --- a/score5.py +++ b/score5.py @@ -1,4 +1,5 @@ from datetime import datetime +import difflib import json from pathlib import Path import os @@ -272,19 +273,78 @@ class XMLScorer: return xml_data + def typo_check(self, xml_path_origin, xml_path): + tree = ET.parse(xml_path) + root = tree.getroot() + tree_origin = ET.parse(xml_path_origin) + root_origin = tree_origin.getroot() + + # xpath로 바이너리 부분추출 + input_text = root.xpath('//CHAR//text()[not(ancestor::HEADER)]') + input_text_origin = root_origin.xpath('//CHAR//text()[not(ancestor::HEADER)]') + + # 각 요소에서 공백 제거 + input_text = [text.replace(' ', '') for text in input_text] + input_text_origin = [text.replace(' ', '') for text in input_text_origin] + + + # 리스트를 하나의 문자열로 변경 + input_text_str = ''.join(input_text) + input_text_origin_str = ''.join(input_text_origin) + + print("input_text as string:") + print(input_text_str) + print("\ninput_text_origin as string:") + print(input_text_origin_str) + + + # 문자열의 차이를 비교 + diff = difflib.ndiff(input_text_origin_str, input_text_str) + diff_list = list(diff) + + # 차이점을 정리하여 result_diff에 저장 + result_diff = [] + skip_next = False + + for i, line in enumerate(diff_list): + if skip_next: + skip_next = False + continue + # diff_list의 line 시작이 '-'이면서 다음 line이 '+'이면 두 line을 붙여서 맞춤법이 틀린 단어로 판단 + if line.startswith('- '): + if i + 1 < len(diff_list) and diff_list[i + 1].startswith('+ '): + line = line.replace('- ', '-') + next = diff_list[i + 1].replace('+ ', '') + result_diff.append(line+'=>'+next) + skip_next = True + else: + line = line.replace('- ', '-') + result_diff.append(line) + elif line.startswith('+ '): + line = line.replace('+ ', '+') + result_diff.append(line) + + # result_diff 출력 + # print("\nResult Differences:") + # for diff in result_diff: + # print(diff) + return result_diff # XML 파일 채점 - def score_directory(self, xml_directory): + def score_directory(self, xml_directory, answer_path): # xml 파일 불러오기 xml_files = Path(xml_directory).glob('*.hml') # 결과 저장할 리스트 results = [] - + + for xml_file in xml_files: + result = {} chart_xml = self.binary_to_chartxml(xml_file) - result = self._score_xml_file(xml_file, chart_xml) + result['score'] = self._score_xml_file(xml_file, chart_xml) + result['typo'] = self.typo_check(answer_path, xml_file) results.append(result) return results @@ -305,9 +365,11 @@ class XMLScorer: summary_data = [] detail_data = [] + typo_data = [] - for result in results: + for temp in results: # 요약 정보 + result = temp['score'] summary_row = { '파일명': result['filename'], '총점': result.get('total_score', 0) @@ -335,12 +397,20 @@ class XMLScorer: summary_df = pd.DataFrame(summary_data) detail_df = pd.DataFrame(detail_data).transpose() + + + for temp in results: + result = temp['typo'] + typo_data.append(result) + + type_df = pd.DataFrame(typo_data).transpose() # detail_df = pd.DataFrame(detail_data) # ExcelWriter 객체 생성 with pd.ExcelWriter(output_path, engine='openpyxl') as writer: summary_df.to_excel(writer, sheet_name='채점결과요약', index=False) detail_df.to_excel(writer, sheet_name='채점상세내역', index=False) + type_df.to_excel(writer, sheet_name='오타내역', index=False) # 열 너비 자동 조정 # for sheet_name in writer.sheets: @@ -365,12 +435,15 @@ def main(): # xml(hml)파일 디렉토리 경로 xml_directory = r'./output' + + # 오탈자 체크를 위한 정답 파일 경로 + answer_path = r'./output/정답.hml' # 채점 클래스 초기화 scorer = XMLScorer(scoring_criteria_path) # 폴더 내 모든 xml 파일 채점 - results = scorer.score_directory(xml_directory) + results = scorer.score_directory(xml_directory, answer_path) # for result in results: # print(f"\n파일: {result['filename']}")