score5.py > diwScoring.py 변경 / JSON파일 구조 변경에 따라 일부 소스코드 변경

2025-04-17 17:24:12 +09:00
parent 148437c97a
commit d6ef894fc6
11 changed files with 6595 additions and 2590 deletions
--- a/score5.py
+++ b/score5.py
@@ -139,7 +139,7 @@ class XMLScorer:
            else:
                chart_tree = ET.fromstring(chart_xml)
            
-            total_score = 0
+            total_score = 0 
            partial_score = 0
            previous_first_digit = None
            
@@ -308,29 +308,29 @@ class XMLScorer:
        
        return xml_data
    
-    def typo_check(self, xml_path_origin, xml_path):
-        tree = ET.parse(xml_path)
-        root = tree.getroot()
-        tree_origin = ET.parse(xml_path_origin)
-        root_origin = tree_origin.getroot()
+    def typo_check(self, correct_answer_file, user_answer_file):
+        user_answer_tree = ET.parse(user_answer_file)
+        user_answer_root = user_answer_tree.getroot()
+        correct_answer_tree = ET.parse(correct_answer_file)
+        correct_answer_root = correct_answer_tree.getroot()

        # xpath로 바이너리 부분추출
-        input_text = root.xpath('//CHAR//text()[not(ancestor::HEADER) and not(ancestor::TABLE)]')
-        table_text = root.xpath('//TABLE//CHAR//text()')
-        input_text += table_text
+        user_input_text = user_answer_root.xpath('//CHAR//text()[not(ancestor::HEADER) and not(ancestor::TABLE)]')
+        user_table_text = user_answer_root.xpath('//TABLE//CHAR//text()')
+        user_input_text += user_table_text
        
-        input_text_origin = root_origin.xpath('//CHAR//text()[not(ancestor::HEADER) and not(ancestor::TABLE)]')
-        table_text_origin = root_origin.xpath('//TABLE//CHAR//text()')
-        input_text_origin += table_text_origin
+        correct_input_text = correct_answer_root.xpath('//CHAR//text()[not(ancestor::HEADER) and not(ancestor::TABLE)]')
+        correct_table_text = correct_answer_root.xpath('//TABLE//CHAR//text()')
+        correct_input_text += correct_table_text

        # 각 요소에서 공백 제거
-        input_text = [text.replace(' ', '') for text in input_text]
-        input_text_origin = [text.replace(' ', '') for text in input_text_origin]
+        user_input_text = [text.replace(' ', '') for text in user_input_text]
+        correct_input_text = [text.replace(' ', '') for text in correct_input_text]

        
        # 숫자와 특정 형식 제거 (예: 1., 2., 3., -)
-        input_text = [re.sub(r'\d+\.\s*|-', '', text) for text in input_text]
-        input_text_origin = [re.sub(r'\d+\.\s*|-', '', text) for text in input_text_origin]
+        user_input_text = [re.sub(r'\d+\.\s*|-', '', text) for text in user_input_text]
+        correct_input_text = [re.sub(r'\d+\.\s*|-', '', text) for text in correct_input_text]
    
        ignore_word = self.scoring_criteria["2-29"]['path'].split("'")[1]
        print(f"ignore_word: {ignore_word}")
@@ -338,21 +338,21 @@ class XMLScorer:
        # 오타와 누락의 경우만 판단하면 정상작동하지만
        # 추가 된 단어의 경우를 채점기준에 추가하면 정확하게 채점 되지 않을 수 있음
        # [정답] Hybrid [실제작성] 
-        input_text = [text.replace(ignore_word, '') for text in input_text]
-        input_text_origin = [text.replace(ignore_word, '') for text in input_text_origin]
+        user_input_text = [text.replace(ignore_word, '') for text in user_input_text]
+        correct_input_text = [text.replace(ignore_word, '') for text in correct_input_text]
        
        # 리스트를 하나의 문자열로 변경
-        input_text_str = ''.join(input_text)
-        input_text_origin_str = ''.join(input_text_origin)
+        user_input_text_str = ''.join(user_input_text)
+        currect_input_text_str = ''.join(correct_input_text)

-        print("input_text as string:")
-        print(input_text_str)
-        print("\ninput_text_origin as string:")
-        print(input_text_origin_str)
+        print("user_input_text as string:")
+        print(user_input_text_str)
+        print("\ncurrect_input_text_answer as string:")
+        print(currect_input_text_str)


        # 문자열의 차이를 비교
-        diff = difflib.ndiff(input_text_origin_str, input_text_str)
+        diff = difflib.ndiff(currect_input_text_str, user_input_text_str)
        diff_list = list(diff)

        # 차이점을 정리하여 result_diff에 저장
@@ -412,18 +412,18 @@ class XMLScorer:
        return result_diff
    
    # XML 파일 채점
-    def score_directory(self, xml_directory, answer_path):
+    def score_directory(self, xml_directory, correct_answer_file):
        # xml 파일 불러오기
        xml_files = Path(xml_directory).glob('*.hml')
    
        # 결과 저장할 리스트
        results = []
        
-        for xml_file in xml_files:
+        for user_answer_file in xml_files:
            result = {}
-            chart_xml = self.binary_to_chartxml(xml_file)
-            result['typo'] = self.typo_check(answer_path, xml_file)
-            result['score'] = self._score_xml_file(xml_file, chart_xml)
+            chart_xml = self.binary_to_chartxml(user_answer_file)
+            result['typo'] = self.typo_check(correct_answer_file, user_answer_file)
+            result['score'] = self._score_xml_file(user_answer_file, chart_xml)
            # result['score']['score_results'][2]['points'] = result['typo'][0]
            results.append(result)    
        return results
@@ -559,7 +559,7 @@ def main():
        

        # 오탈자 체크를 위한 정답 파일 경로 (형식:DIW_2503A.hml)
-        answer_path = f'./output/{exam_type}/DIW/DIW_{exam_round}{exam_type}.hml'
+        correct_answer_file = f'./output/{exam_type}/DIW/DIW_{exam_round}{exam_type}.hml'
        
        # 엑셀 파일명 (비어있으면 자동생성)
        timestamp = datetime.now().strftime("%y%m%d")
@@ -569,7 +569,7 @@ def main():
        scorer = XMLScorer(scoring_criteria_path)

        # 폴더 내 모든 xml 파일 채점
-        results = scorer.score_directory(xml_directory, answer_path)
+        results = scorer.score_directory(xml_directory, correct_answer_file)
    
        # 채점 결과 엑셀로 저장
        output_excel_paths.append(scorer.export_to_excel(results, output_path))