From 0d22e3f14661c9a4030a36ccd2192650bc8e525b Mon Sep 17 00:00:00 2001
From: waterdrw <waterdrw@gmail.com>
Date: Sat, 18 Jan 2025 11:06:24 +0900
Subject: [PATCH] Add typo checking functionality and update scoring method to
 include typo results

---
 score5.py | 83 +++++++++++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 78 insertions(+), 5 deletions(-)

diff --git a/score5.py b/score5.py
index ad9e5f9..deeee3f 100644
--- a/score5.py
+++ b/score5.py
@@ -1,4 +1,5 @@
 from datetime import datetime
+import difflib
 import json
 from pathlib import Path
 import os
@@ -272,19 +273,78 @@ class XMLScorer:
         
         return xml_data
     
+    def typo_check(self, xml_path_origin, xml_path):
+        tree = ET.parse(xml_path)
+        root = tree.getroot()
+        tree_origin = ET.parse(xml_path_origin)
+        root_origin = tree_origin.getroot()
+
+        # xpath로 바이너리 부분추출
+        input_text = root.xpath('//CHAR//text()[not(ancestor::HEADER)]')
+        input_text_origin = root_origin.xpath('//CHAR//text()[not(ancestor::HEADER)]')
+
+        # 각 요소에서 공백 제거
+        input_text = [text.replace(' ', '') for text in input_text]
+        input_text_origin = [text.replace(' ', '') for text in input_text_origin]
+
+
+        # 리스트를 하나의 문자열로 변경
+        input_text_str = ''.join(input_text)
+        input_text_origin_str = ''.join(input_text_origin)
+
+        print("input_text as string:")
+        print(input_text_str)
+        print("\ninput_text_origin as string:")
+        print(input_text_origin_str)
+
+
+        # 문자열의 차이를 비교
+        diff = difflib.ndiff(input_text_origin_str, input_text_str)
+        diff_list = list(diff)
+
+        # 차이점을 정리하여 result_diff에 저장
+        result_diff = []
+        skip_next = False
+
+        for i, line in enumerate(diff_list):
+            if skip_next:
+                skip_next = False
+                continue
+            # diff_list의 line 시작이 '-'이면서 다음 line이 '+'이면 두 line을 붙여서 맞춤법이 틀린 단어로 판단
+            if line.startswith('- '):
+                if i + 1 < len(diff_list) and diff_list[i + 1].startswith('+ '):
+                    line = line.replace('- ', '-')
+                    next = diff_list[i + 1].replace('+ ', '')
+                    result_diff.append(line+'=>'+next)
+                    skip_next = True
+                else:
+                    line = line.replace('- ', '-')
+                    result_diff.append(line)
+            elif line.startswith('+ '):
+                line = line.replace('+ ', '+')
+                result_diff.append(line)
+
+        # result_diff 출력
+        # print("\nResult Differences:")
+        # for diff in result_diff:
+        #     print(diff)
+        return result_diff
     
     # XML 파일 채점
-    def score_directory(self, xml_directory):
+    def score_directory(self, xml_directory, answer_path):
         
         # xml 파일 불러오기
         xml_files = Path(xml_directory).glob('*.hml')
     
         # 결과 저장할 리스트
         results = []
-    
+        
+        
         for xml_file in xml_files:
+            result = {}
             chart_xml = self.binary_to_chartxml(xml_file)
-            result = self._score_xml_file(xml_file, chart_xml)
+            result['score'] = self._score_xml_file(xml_file, chart_xml)
+            result['typo'] = self.typo_check(answer_path, xml_file)
             results.append(result)    
         return results
 
@@ -305,9 +365,11 @@ class XMLScorer:
 
         summary_data = []
         detail_data = []
+        typo_data = []
         
-        for result in results:
+        for temp in results:
             # 요약 정보
+            result = temp['score']
             summary_row = {
                 '파일명': result['filename'],
                 '총점': result.get('total_score', 0)
@@ -335,12 +397,20 @@ class XMLScorer:
 
         summary_df = pd.DataFrame(summary_data)
         detail_df = pd.DataFrame(detail_data).transpose()
+        
+        
+        for temp in results:
+            result = temp['typo']
+            typo_data.append(result)
+            
+        type_df = pd.DataFrame(typo_data).transpose()
         # detail_df = pd.DataFrame(detail_data)
 
         # ExcelWriter 객체 생성
         with pd.ExcelWriter(output_path, engine='openpyxl') as writer:
             summary_df.to_excel(writer, sheet_name='채점결과요약', index=False)
             detail_df.to_excel(writer, sheet_name='채점상세내역', index=False)
+            type_df.to_excel(writer, sheet_name='오타내역', index=False)
 
             # 열 너비 자동 조정
             # for sheet_name in writer.sheets:
@@ -365,12 +435,15 @@ def main():
 
     # xml(hml)파일 디렉토리 경로
     xml_directory = r'./output'
+    
+    # 오탈자 체크를 위한 정답 파일 경로
+    answer_path = r'./output/정답.hml'
 
     # 채점 클래스 초기화
     scorer = XMLScorer(scoring_criteria_path)
 
     # 폴더 내 모든 xml 파일 채점
-    results = scorer.score_directory(xml_directory)
+    results = scorer.score_directory(xml_directory, answer_path)
   
 #   for result in results:
 #     print(f"\n파일: {result['filename']}")