[2-4~13] 2페이지 글상자 채점 기준 수정

2025-09-12 16:55:30 +09:00
parent 6698bca8e3
commit 06a07198d5
6 changed files with 176 additions and 82 deletions
--- a/diwScoring2.py
+++ b/diwScoring2.py
@@ -135,32 +135,77 @@ class XMLScorer:
    def _score_xml_file(self, xml_file, chart_xml):
        def parse_pages_by_bookmark(root):
            """
-            BOOKMARK(Name="Page_X_start" ~ "Page_X_end") 사이의 <P> 요소들을
-            페이지 단위로 딕셔너리에 저장
+            BOOKMARK(Name="Page_X_start" ~ "Page_X_end") 사이의 최상위 블록 요소들을
+            페이지 단위로 딕셔너리에 저장하여 XML 구조를 유지합니다.
+
+            Args:
+                root: lxml etree의 루트 요소
+
+            Returns:
+                dict: 페이지 이름을 키로, 해당 페이지의 lxml 요소 리스트를 값으로 하는 딕셔너리
            """
            pages = {}
-            all_p_tags = root.xpath('//P')
+            
+            # 1. 'SECTION'의 직계 자식들만 가져와 평탄화를 방지합니다.
+            #    이것들이 문서의 최상위 구조를 이루는 요소들입니다 (P, TABLE 등).
+            all_top_blocks = root.xpath('//SECTION/*')

            current_page = None
            page_start_index = None

-            for i, p in enumerate(all_p_tags):
-                # P 안의 모든 BOOKMARK 탐색
-                bookmarks = p.xpath('.//BOOKMARK')
-                for bm in bookmarks:
-                    name = bm.get('Name')
-                    if name and name.endswith('_start'):
-                        current_page = name.replace('_start', '')
-                        page_start_index = i
-                    elif name and name.endswith('_end') and current_page is not None:
-                        page_end_index = i
-                        # 시작~끝까지 P 태그 묶음 저장
-                        page_content = all_p_tags[page_start_index:page_end_index + 1]
-                        pages[current_page] = page_content
-                        current_page = None
-                        page_start_index = None
+            # 2. 이 최상위 블록 리스트를 순회합니다.
+            for i, block in enumerate(all_top_blocks):
+                # 3. 현재 블록 '내부'에서 시작 북마크를 탐색합니다. ('.//' 사용)
+                start_bookmarks = block.xpath('.//BOOKMARK[contains(@Name, "_start")]')
+                if start_bookmarks:
+                    # 여러 북마크가 있을 수 있으니 첫 번째 것을 기준으로 합니다.
+                    name = start_bookmarks[0].get('Name')
+                    current_page = name.replace('_start', '')
+                    page_start_index = i

+                # 4. 현재 페이지를 찾는 중이고, 끝 북마크를 찾았을 경우
+                #    f-string을 사용하여 정확한 끝 북마크 이름을 찾습니다.
+                if current_page is not None and block.xpath(f'.//BOOKMARK[@Name="{current_page}_end"]'):
+                    page_end_index = i
+                    
+                    # 5. 시작 인덱스부터 끝 인덱스까지 'all_top_blocks' 리스트를 슬라이싱합니다.
+                    #    이 슬라이스에는 중첩 구조가 온전히 보존된 최상위 요소들이 담깁니다.
+                    page_content = all_top_blocks[page_start_index : page_end_index + 1]
+                    pages[current_page] = page_content
+                    
+                    # 상태 초기화
+                    current_page = None
+                    page_start_index = None
+                    
            return pages
+        # def parse_pages_by_bookmark(root):
+        #     """
+        #     BOOKMARK(Name="Page_X_start" ~ "Page_X_end") 사이의 <P> 요소들을
+        #     페이지 단위로 딕셔너리에 저장
+        #     """
+        #     pages = {}
+        #     all_p_tags = root.xpath('//P')
+
+        #     current_page = None
+        #     page_start_index = None
+
+        #     for i, p in enumerate(all_p_tags):
+        #         # P 안의 모든 BOOKMARK 탐색
+        #         bookmarks = p.xpath('.//BOOKMARK')
+        #         for bm in bookmarks:
+        #             name = bm.get('Name')
+        #             if name and name.endswith('_start'):
+        #                 current_page = name.replace('_start', '')
+        #                 page_start_index = i
+        #             elif name and name.endswith('_end') and current_page is not None:
+        #                 page_end_index = i
+        #                 # 시작~끝까지 P 태그 묶음 저장
+        #                 page_content = all_p_tags[page_start_index:page_end_index + 1]
+        #                 pages[current_page] = page_content
+        #                 current_page = None
+        #                 page_start_index = None
+
+        #     return pages
    
        def extract_char_text_from_p(p_element):
            """
@@ -249,6 +294,30 @@ class XMLScorer:
                    }
                    
                    try:
+                        has_page2_rectangle = False
+                        if "Rectangle" in (category or ""):
+                            def has_elements(ptags, xpath):
+                                for p in ptags:
+                                    element_list = p.xpath(xpath) if xpath else []
+                                    if element_list:
+                                        return True
+                                return False
+                            
+                            page2_ptags = pages.get('Page_2', [])
+                            rectangle_xpath = ".//RECTANGLE"
+                            has_page2_rectangle = has_elements(page2_ptags, rectangle_xpath)
+                            
+                            if not has_page2_rectangle:
+                                user_answer = None
+                                self.evaluate_answer(scoring, user_answer, right_answer, points, method="equal")
+                                continue
+                            else:
+                                rect_charshapes = []
+                                rect_parashapes = []
+                                for p in page2_ptags:
+                                    rect_charshapes.extend(p.xpath(".//RECTANGLE//TEXT/@CharShape"))
+                                    rect_parashapes.extend(p.xpath(".//RECTANGLE//P/@ParaShape"))
+                                    
                        # 머릿말과 관련된 문항에서 1페이지에 머릿말이 없는 경우의 처리
                        # [1-25, 26, 27] 문항 'DIAT' 머릿말 채점시 1페이지에 머릿말이 없으면
                        # 채점하지 않고 0점 처리
@@ -269,24 +338,6 @@ class XMLScorer:
                                self.evaluate_answer(scoring, user_answer, right_answer, points, method="equal")
                                continue
                        
-                        has_page2_rectangle = False
-                        if "Rectangle" in (category or ""):
-                            def has_elements(ptags, xpath):
-                                for p in ptags:
-                                    element_list = p.xpath(xpath) if xpath else []
-                                    if element_list:
-                                        return True
-                                return False
-                            
-                            page2_ptags = pages.get('Page_2', [])
-                            rectangle_xpath = ".//RECTANGLE"
-                            has_page2_rectangle = has_elements(page2_ptags, rectangle_xpath)
-                            
-                            if not has_page2_rectangle:
-                                user_answer = None
-                                self.evaluate_answer(scoring, user_answer, right_answer, points, method="equal")
-                                continue
-                            
                        if (category or "") == "PageSetting":
                            items = root.xpath(xpath)
                            error_range = criterion.get('tolerance', 0)
@@ -458,20 +509,19 @@ class XMLScorer:
                        # 2페이지의 글상자의 ParaShape ID를 동적으로 찾아서 채점
                        elif "TextBoxAlign" in (category or ""):
                            if has_page2_rectangle:
-                                # 2페이지 내에서만 검색
-                                search_root = etree.Element("Page_2")
-                                for p in page2_ptags:
-                                    search_root.append(p)
-                                rectangle_parashape_id = search_root.xpath(".//RECTANGLE/ancestor::P[last()]/@ParaShape")
-
+                                parashape_list = rect_parashapes
+                                
                            else:
-                                # 전체 root에서 검색
-                                rectangle_parashape_id = root.xpath(".//RECTANGLE/ancestor::P[last()]/@ParaShape")
+                                parashape_list = root.xpath(xpath)
                            
-                            # ParaShape ID가 있는 경우에만 xpath 치환 & 실행
-                            if rectangle_parashape_id:
-                                xpath = xpath.replace('{rectangle_parashape_id}', rectangle_parashape_id[0])
-                                items = root.xpath(xpath)
+                            # parashape ID가 있는 경우에만 xpath 치환 & 실행
+                            result_items = []
+                            if parashape_list:
+                                for parashape_id in parashape_list:
+                                    exec_xpath = xpath.replace('{rect_parashape_id}', parashape_id)
+                                    items = root.xpath(exec_xpath)
+                                    result_items.extend(items)
+                                    
                            else:
                                # RECTANGLE이 없으면 items는 빈 리스트
                                items = [None]
@@ -482,16 +532,26 @@ class XMLScorer:
                                if scoring['points'] > 0:
                                    break
                                
+                            # if has_page2_rectangle:
+                            #     # 2페이지 내에서만 검색
+                            #     search_root = etree.Element("Page_2")
+                            #     for p in page2_ptags:
+                            #         search_root.append(p)
+                            #     rect_parashape_id = search_root.xpath(".//RECTANGLE/ancestor::P[last()]/@ParaShape")
+
+                            # else:
+                            #     # 전체 root에서 검색
+                            #     rect_parashape_id = root.xpath(".//RECTANGLE/ancestor::P[last()]/@ParaShape")
+                                
                        # 정답이 하나인 경우
                        # elif (category or "") in ["OneAnswer", "ChartOneAnswer"]:
-                        elif "OneAnswer" in (category or ""):                     
+                        elif "OneAnswer" in (category or ""):             
+                            items = []
+                            items2 = []        
                            if has_page2_rectangle:
-                                search_root = etree.Element("Page_2")
                                for p in page2_ptags:
-                                    search_root.append(p)
-                                items = search_root.xpath(xpath) if xpath else []
-                                items2 = search_root.xpath(xpath2) if xpath2 else []
-                                
+                                    items.extend(p.xpath(xpath) if xpath else "")
+                                    items2.extend(p.xpath(xpath2) if xpath2 else "")
                            else:      
                                items = root.xpath(xpath) if xpath else []
                                items2 = root.xpath(xpath2) if xpath2 else []
@@ -527,12 +587,10 @@ class XMLScorer:
                        
                        # [2-6] 테두리 이중실선 1.00mm
                        elif "LineShape" in (category or ""):
+                            line_shapes = []
                            if has_page2_rectangle:
-                                search_root = etree.Element("Page_2")
                                for p in page2_ptags:
-                                    search_root.append(p)
-                                line_shapes = search_root.xpath(xpath) if xpath else []
-                                
+                                    line_shapes.extend(p.xpath(xpath) if xpath else [])
                            else:
                                line_shapes = root.xpath(xpath) if xpath else []
                            
@@ -555,13 +613,26 @@ class XMLScorer:
                        # 사용자 입력값이 mm단위인 경우
                        # elif (category or "") == "mmSize":
                        elif "mmSize" in (category or ""):
+                            items = []
                            if has_page2_rectangle:
-                                search_root = etree.Element("Page_2")
                                for p in page2_ptags:
-                                    search_root.append(p)
-                                items = search_root.xpath(xpath)
+                                    items.extend(p.xpath(xpath))
                                
                            else:
+                                # print(etree.tostring(root, pretty_print=True, encoding="unicode"))
+
+                                # 단계별 점검
+                                # print("1) PICTURE 태그 존재 여부:", root.xpath("//PICTURE"))
+                                # print("2) BINITEM 태그 존재 여부:", root.xpath("//BINITEM"))
+                                # print("3) BINITEM Format 값:", root.xpath("//BINITEM/@Format"))
+                                # print("4) BINITEM BinData 값:", root.xpath("//BINITEM/@BinData"))
+                                # print("5) IMAGE BinItem 값:", root.xpath("//IMAGE/@BinItem"))
+                                # print("6) PICTURE 안의 IMAGE 존재 여부:", root.xpath("//PICTURE/IMAGE"))
+                                # print("7) SIZE Width 값:", root.xpath("//PICTURE/SHAPEOBJECT/SIZE/@Width"))
+
+                                # 원래 쓰시던 XPath 그대로 결과 확인
+                                
+                                # print("8) 원래 XPath 결과:", root.xpath(xpath_expr))
                                items = root.xpath(xpath)
                            # 오차범위 설정
                            # 한글 프로그램 내부에서 드물게 0mm이지만 1pt로 저장되는 경우가 있음
@@ -613,12 +684,12 @@ class XMLScorer:
                                    
                        # 채점기준표 파일에 작성된 rgb값을 그대로 읽어와 HML파일 요소의 int형 rgb값과 비교
                        elif "Color" in (category or ""):
+                            items = []
+                            items2 = []
                            if has_page2_rectangle:
-                                search_root = etree.Element("Page_2")
                                for p in page2_ptags:
-                                    search_root.append(p)
-                                items = search_root.xpath(xpath) if xpath else []
-                                items2 = search_root.xpath(xpath2) if xpath2 else []
+                                    items.extend(p.xpath(xpath) if xpath else "")
+                                    items2.extend(p.xpath(xpath2) if xpath2 else "")
                                
                            else:      
                                items = root.xpath(xpath) if xpath else []
@@ -658,11 +729,8 @@ class XMLScorer:
                        
                        # 폰트명 
                        elif "FontName" in (category or ""):
-                            if has_page2_rectangle:
-                                search_root = etree.Element("Page_2")
-                                for p in page2_ptags:
-                                    search_root.append(p)
-                                charshape_list = search_root.xpath(xpath)
+                            if has_page2_rectangle: 
+                                charshape_list = rect_charshapes
                                
                            else:      
                                charshape_list = root.xpath(xpath)
@@ -712,6 +780,32 @@ class XMLScorer:
                                    score = points if any_match else 0
                                    self.evaluate_answer(scoring, matched_user_answer if any_match else "", right_answer, score)

+                        elif "FontSize" in (category or ""):
+                            if has_page2_rectangle:
+                                charshape_list = rect_charshapes
+                                
+                            else:
+                                charshape_list = root.xpath(xpath)
+                            
+                            # CharShape ID가 있는 경우에만 xpath 치환 & 실행
+                            result_items = []
+                            if charshape_list:
+                                for charshape_id in charshape_list:
+                                    exec_xpath = xpath.replace('{rect_charshape_id}', charshape_id)
+                                    items = root.xpath(exec_xpath)
+                                    result_items.extend(items)
+                                    
+                            else:
+                                # RECTANGLE이 없으면 items는 빈 리스트
+                                items = [None]
+                                                    
+                            for item in items:
+                                user_answer = item
+                                self.evaluate_answer(scoring, user_answer, right_answer, points)
+                                if scoring['points'] > 0:
+                                    break
+                            
+                                                            
                        # 폰트 속성
                        elif (category or "") == "FontAttribute":
                            # 하이퍼링크 처리
@@ -1378,8 +1472,8 @@ class XMLScorer:
 def main():

    # 시험회차 및 유형
-    # exam_round = '2508'
-    exam_round = '2522'
+    exam_round = '2508'
+    # exam_round = '2522'
    
    # 채점하고자 하는 유형은 주석 해제
    exam_types = [
@@ -1389,8 +1483,8 @@ def main():
        # 'D',
    ]

-    # test_mode = False
-    test_mode = True #/TEST 폴더 채점시 
+    test_mode = False
+    # test_mode = True #/TEST 폴더 채점시 
    
    output_excel_paths = []
    for exam_type in exam_types: