머릿말 채점 기준 수정

2025-09-09 15:14:48 +09:00
parent 12bf1c6419
commit 8ae46ef54d
19 changed files with 176 additions and 58 deletions
--- a/diwScoring2.py
+++ b/diwScoring2.py
@@ -133,10 +133,36 @@ class XMLScorer:
            
    # 하나의 XML 파일 채점
    def _score_xml_file(self, xml_file, chart_xml):
-        
+        # def parse_pages_by_bookmark(root):
+        #     """
+        #     P/TEXT/BOOKMARK 구조를 가진 XML에서 페이지 구간별 <P> 요소를 파싱하여 반환
+        #     """
+        #     pages = {}
+        #     all_p_tags = root.xpath('//P')
+
+        #     current_page = None
+        #     page_start_index = None
+
+        #     for i, p in enumerate(all_p_tags):
+        #         # BOOKMARK가 존재하는지 확인 (어디에 있든 탐색)
+        #         bookmarks = p.xpath('.//BOOKMARK')
+        #         for bm in bookmarks:
+        #             name = bm.get('Name')
+        #             if name and name.endswith('_start'):
+        #                 current_page = name.replace('_start', '')
+        #                 page_start_index = i
+        #             elif name and name.endswith('_end') and current_page is not None:
+        #                 page_end_index = i
+        #                 page_content = all_p_tags[page_start_index:page_end_index + 1]
+        #                 pages[current_page] = page_content
+        #                 current_page = None
+        #                 page_start_index = None
+
+        #     return pages
        def parse_pages_by_bookmark(root):
            """
-            P/TEXT/BOOKMARK 구조를 가진 XML에서 페이지 구간별 <p> 요소를 파싱하여 반환
+            BOOKMARK(Name="Page_X_start" ~ "Page_X_end") 사이의 <P> 요소들을
+            페이지 단위로 딕셔너리에 저장
            """
            pages = {}
            all_p_tags = root.xpath('//P')
@@ -145,15 +171,16 @@ class XMLScorer:
            page_start_index = None

            for i, p in enumerate(all_p_tags):
-                # BOOKMARK가 존재하는지 확인
-                bookmark = p.xpath('./TEXT/BOOKMARK')
-                if bookmark:
-                    name = bookmark[0].get('Name')
+                # P 안의 모든 BOOKMARK 탐색
+                bookmarks = p.xpath('.//BOOKMARK')
+                for bm in bookmarks:
+                    name = bm.get('Name')
                    if name and name.endswith('_start'):
                        current_page = name.replace('_start', '')
                        page_start_index = i
                    elif name and name.endswith('_end') and current_page is not None:
                        page_end_index = i
+                        # 시작~끝까지 P 태그 묶음 저장
                        page_content = all_p_tags[page_start_index:page_end_index + 1]
                        pages[current_page] = page_content
                        current_page = None
@@ -248,6 +275,25 @@ class XMLScorer:
                    }
                    
                    try:
+                        if "Header" in (category or ""):   
+                            def has_elements(ptags, xpath):
+                                for p in ptags:
+                                    element_list = p.xpath(xpath) if xpath else []
+                                    if element_list:
+                                        return True
+                                return False
+                        
+                            page1_ptags = pages.get('Page_1', [])
+                            page2_ptags = pages.get('Page_2', [])
+                            header_xpath = "//HEADER//P"
+                            has_page1_element = has_elements(page1_ptags, header_xpath)
+                            has_page2_element = has_elements(page2_ptags, header_xpath)
+                            
+                            if not has_page1_element or not has_page2_element:
+                                user_answer = None
+                                self.evaluate_answer(scoring, user_answer, right_answer, points, method="equal")
+                                continue
+                            
                        if (category or "") == "PageSetting":
                            items = root.xpath(xpath)
                            error_range = criterion.get('tolerance', 0)
@@ -418,26 +464,7 @@ class XMLScorer:

                        # 정답이 하나인 경우
                        # elif (category or "") in ["OneAnswer", "ChartOneAnswer"]:
-                        elif "OneAnswer" in (category or ""):
-                            if "Header" in category:    
-                                def has_elements(ptags, xpath):
-                                    for p in ptags:
-                                        charshape_list = p.xpath(xpath) if xpath else []
-                                        if charshape_list:
-                                            return True
-                                    return False
-                            
-                                page1_ptags = pages.get('Page_1', [])
-                                page2_ptags = pages.get('Page_2', [])
-                                header_xpath = "//HEADER//P"
-                                has_page1_element = has_elements(page1_ptags, header_xpath)
-                                has_page2_element = has_elements(page2_ptags, header_xpath)
-                                
-                                if not has_page1_element or not has_page2_element:
-                                    user_answer = ""
-                                    self.evaluate_answer(scoring, user_answer, right_answer, points, method="equal")
-                                    continue
-                                
+                        elif "OneAnswer" in (category or ""):                                
                            items = root.xpath(xpath) if xpath else []
                            items2 = root.xpath(xpath2) if xpath2 else []
                            
@@ -580,26 +607,6 @@ class XMLScorer:
                        
                        # 폰트명 
                        elif "FontName" in (category or ""):
-                            # 'DIAT' 머릿말 문항 1,2페이지 둘 중 하나라도 없으면 0점 처리
-                            if "Header" in category:    
-                                def has_charshape(ptags, xpath):
-                                    for p in ptags:
-                                        charshape_list = p.xpath(xpath) if xpath else []
-                                        if charshape_list:
-                                            return True
-                                    return False
-                            
-                                page1_ptags = pages.get('Page_1', [])
-                                page2_ptags = pages.get('Page_2', [])
-                                header_xpath = "//HEADER//P"
-                                has_page1_element = has_charshape(page1_ptags, header_xpath)
-                                has_page2_element = has_charshape(page2_ptags, header_xpath)
-                                
-                                if not has_page1_element or not has_page2_element:
-                                    user_answer = ""
-                                    self.evaluate_answer(scoring, user_answer, right_answer, points, method="equal")
-                                    continue
-                                        
                            charshape_list = root.xpath(xpath)

                            # 문자속성이 없는 경우
@@ -1313,19 +1320,19 @@ class XMLScorer:
 def main():

    # 시험회차 및 유형
-    # exam_round = '2508'
-    exam_round = '2522'
+    exam_round = '2508'
+    # exam_round = '2522'
    
    # 채점하고자 하는 유형은 주석 해제
    exam_types = [
-        'A',
-        'B',
-        'C',
-        # 'D',
+        # 'A',
+        # 'B',
+        # 'C',
+        'D',
    ]

-    test_mode = False
-    # test_mode = True #/TEST 폴더 채점시 
+    # test_mode = False
+    test_mode = True #/TEST 폴더 채점시 
    
    output_excel_paths = []
    for exam_type in exam_types: