Add typo checking functionality and update scoring method to include typo results

2025-01-18 11:06:24 +09:00
parent faefe14f73
commit 0d22e3f146
1 changed files with 78 additions and 5 deletions
--- a/score5.py
+++ b/score5.py
@@ -1,4 +1,5 @@
 from datetime import datetime
 import difflib
 import json
 from pathlib import Path
 import os
@@ -272,9 +273,65 @@ class XMLScorer:
        return xml_data
    def typo_check(self, xml_path_origin, xml_path):
        tree = ET.parse(xml_path)
        root = tree.getroot()
        tree_origin = ET.parse(xml_path_origin)
        root_origin = tree_origin.getroot()
        # xpath로 바이너리 부분추출
        input_text = root.xpath('//CHAR//text()[not(ancestor::HEADER)]')
        input_text_origin = root_origin.xpath('//CHAR//text()[not(ancestor::HEADER)]')
        # 각 요소에서 공백 제거
        input_text = [text.replace(' ', '') for text in input_text]
        input_text_origin = [text.replace(' ', '') for text in input_text_origin]
        # 리스트를 하나의 문자열로 변경
        input_text_str = ''.join(input_text)
        input_text_origin_str = ''.join(input_text_origin)
        print("input_text as string:")
        print(input_text_str)
        print("\ninput_text_origin as string:")
        print(input_text_origin_str)
        # 문자열의 차이를 비교
        diff = difflib.ndiff(input_text_origin_str, input_text_str)
        diff_list = list(diff)
        # 차이점을 정리하여 result_diff에 저장
        result_diff = []
        skip_next = False
        for i, line in enumerate(diff_list):
            if skip_next:
                skip_next = False
                continue
            # diff_list의 line 시작이 '-'이면서 다음 line이 '+'이면 두 line을 붙여서 맞춤법이 틀린 단어로 판단
            if line.startswith('- '):
                if i + 1 < len(diff_list) and diff_list[i + 1].startswith('+ '):
                    line = line.replace('- ', '-')
                    next = diff_list[i + 1].replace('+ ', '')
                    result_diff.append(line+'=>'+next)
                    skip_next = True
                else:
                    line = line.replace('- ', '-')
                    result_diff.append(line)
            elif line.startswith('+ '):
                line = line.replace('+ ', '+')
                result_diff.append(line)
        # result_diff 출력
        # print("\nResult Differences:")
        # for diff in result_diff:
        #     print(diff)
        return result_diff
    # XML 파일 채점
-    def score_directory(self, xml_directory):
+    def score_directory(self, xml_directory, answer_path):
        # xml 파일 불러오기
        xml_files = Path(xml_directory).glob('*.hml')
@@ -282,9 +339,12 @@ class XMLScorer:
        # 결과 저장할 리스트
        results = []
        for xml_file in xml_files:
            result = {}
            chart_xml = self.binary_to_chartxml(xml_file)
-            result = self._score_xml_file(xml_file, chart_xml)
+            result['score'] = self._score_xml_file(xml_file, chart_xml)
            result['typo'] = self.typo_check(answer_path, xml_file)
            results.append(result)    
        return results
@@ -305,9 +365,11 @@ class XMLScorer:
        summary_data = []
        detail_data = []
        typo_data = []
-        for result in results:
+        for temp in results:
            # 요약 정보
            result = temp['score']
            summary_row = {
                '파일명': result['filename'],
                '총점': result.get('total_score', 0)
@@ -335,12 +397,20 @@ class XMLScorer:
        summary_df = pd.DataFrame(summary_data)
        detail_df = pd.DataFrame(detail_data).transpose()
        for temp in results:
            result = temp['typo']
            typo_data.append(result)
        type_df = pd.DataFrame(typo_data).transpose()
        # detail_df = pd.DataFrame(detail_data)
        # ExcelWriter 객체 생성
        with pd.ExcelWriter(output_path, engine='openpyxl') as writer:
            summary_df.to_excel(writer, sheet_name='채점결과요약', index=False)
            detail_df.to_excel(writer, sheet_name='채점상세내역', index=False)
            type_df.to_excel(writer, sheet_name='오타내역', index=False)
            # 열 너비 자동 조정
            # for sheet_name in writer.sheets:
@@ -366,11 +436,14 @@ def main():
    # xml(hml)파일 디렉토리 경로
    xml_directory = r'./output'
    # 오탈자 체크를 위한 정답 파일 경로
    answer_path = r'./output/정답.hml'
    # 채점 클래스 초기화
    scorer = XMLScorer(scoring_criteria_path)
    # 폴더 내 모든 xml 파일 채점
-    results = scorer.score_directory(xml_directory)
+    results = scorer.score_directory(xml_directory, answer_path)
 #   for result in results:
 #     print(f"\n파일: {result['filename']}")