diff --git a/250909_DIW_2508C_TEST.xlsx b/250909_DIW_2508C_TEST.xlsx deleted file mode 100644 index 7297518..0000000 Binary files a/250909_DIW_2508C_TEST.xlsx and /dev/null differ diff --git a/250912_DIW_2508C_TEST.xlsx b/250912_DIW_2508C_TEST.xlsx new file mode 100644 index 0000000..19e9b5a Binary files /dev/null and b/250912_DIW_2508C_TEST.xlsx differ diff --git a/250912_DIW_2508C_채점결과.xlsx b/250912_DIW_2508C_채점결과.xlsx new file mode 100644 index 0000000..5250643 Binary files /dev/null and b/250912_DIW_2508C_채점결과.xlsx differ diff --git a/DIW_2508C.json b/DIW_2508C.json index ca67d29..6221332 100644 --- a/DIW_2508C.json +++ b/DIW_2508C.json @@ -344,7 +344,7 @@ "desc": "섹션이 1개 이상이면 점수부여" }, "3": { - "path": "TEXT/COLDEF/@Count", + "path": "./TEXT/COLDEF/@Count", "value": "2", "points": 3, "category": "TwoColumn", @@ -398,32 +398,32 @@ "item": "문구 (구강건강관리)/⑥ 글상자 위치 (글자처럼 취급)" }, "10": { - "path": "//PARASHAPE[@Id='{rectangle_parashape_id}']/@Align", + "path": "//PARASHAPE[@Id='{rect_parashape_id}']/@Align", "value": "Center", "points": 1, "category": "Rectangle.TextBoxAlign", "item": "문구 (구강건강관리)/⑦ 글상자 정렬 (가운데 정렬)" }, "11": { - "path": "//TEXT[CHAR[text()='{searchValue}']]/@CharShape", + "path": ".//RECTANGLE//TEXT/@CharShape", "value": "맑은 고딕", "points": 1, "category": "Rectangle.FontName", "item": "문구 (구강건강관리)/⑧ 글씨체 (맑은 고딕)" }, "12": { - "path": "//CHARSHAPE[@Id=//RECTANGLE//TEXT[./CHAR[text()='{searchValue}']]/@CharShape]/@Height", + "path": "//CHARSHAPE[@Id='{rect_charshape_id}']/@Height", "value": "2300", "points": 1, - "category": "Rectangle.OneAnswer", + "category": "Rectangle.FontSize", "item": "문구 (구강건강관리)/⑨ 글씨크기 (2300)", "desc": "1pt당 100" }, "13": { - "path": "//PARASHAPE[@Id=//RECTANGLE//P[.//CHAR[text()='{searchValue}']]/@ParaShape]/@Align", + "path": "//PARASHAPE[@Id={rect_parashape_id}]/@Align", "value": "Center", "points": 1, - "category": "Rectangle.OneAnswer", + "category": "Rectangle.TextBoxAlign", "item": "문구 (구강건강관리)/⑩ 정렬 (가운데 정렬)" }, "14": { diff --git a/diwScoring2.py b/diwScoring2.py index e695604..b78e1e7 100644 --- a/diwScoring2.py +++ b/diwScoring2.py @@ -135,32 +135,77 @@ class XMLScorer: def _score_xml_file(self, xml_file, chart_xml): def parse_pages_by_bookmark(root): """ - BOOKMARK(Name="Page_X_start" ~ "Page_X_end") 사이의
요소들을 - 페이지 단위로 딕셔너리에 저장 + BOOKMARK(Name="Page_X_start" ~ "Page_X_end") 사이의 최상위 블록 요소들을 + 페이지 단위로 딕셔너리에 저장하여 XML 구조를 유지합니다. + + Args: + root: lxml etree의 루트 요소 + + Returns: + dict: 페이지 이름을 키로, 해당 페이지의 lxml 요소 리스트를 값으로 하는 딕셔너리 """ pages = {} - all_p_tags = root.xpath('//P') + + # 1. 'SECTION'의 직계 자식들만 가져와 평탄화를 방지합니다. + # 이것들이 문서의 최상위 구조를 이루는 요소들입니다 (P, TABLE 등). + all_top_blocks = root.xpath('//SECTION/*') current_page = None page_start_index = None - for i, p in enumerate(all_p_tags): - # P 안의 모든 BOOKMARK 탐색 - bookmarks = p.xpath('.//BOOKMARK') - for bm in bookmarks: - name = bm.get('Name') - if name and name.endswith('_start'): - current_page = name.replace('_start', '') - page_start_index = i - elif name and name.endswith('_end') and current_page is not None: - page_end_index = i - # 시작~끝까지 P 태그 묶음 저장 - page_content = all_p_tags[page_start_index:page_end_index + 1] - pages[current_page] = page_content - current_page = None - page_start_index = None + # 2. 이 최상위 블록 리스트를 순회합니다. + for i, block in enumerate(all_top_blocks): + # 3. 현재 블록 '내부'에서 시작 북마크를 탐색합니다. ('.//' 사용) + start_bookmarks = block.xpath('.//BOOKMARK[contains(@Name, "_start")]') + if start_bookmarks: + # 여러 북마크가 있을 수 있으니 첫 번째 것을 기준으로 합니다. + name = start_bookmarks[0].get('Name') + current_page = name.replace('_start', '') + page_start_index = i + # 4. 현재 페이지를 찾는 중이고, 끝 북마크를 찾았을 경우 + # f-string을 사용하여 정확한 끝 북마크 이름을 찾습니다. + if current_page is not None and block.xpath(f'.//BOOKMARK[@Name="{current_page}_end"]'): + page_end_index = i + + # 5. 시작 인덱스부터 끝 인덱스까지 'all_top_blocks' 리스트를 슬라이싱합니다. + # 이 슬라이스에는 중첩 구조가 온전히 보존된 최상위 요소들이 담깁니다. + page_content = all_top_blocks[page_start_index : page_end_index + 1] + pages[current_page] = page_content + + # 상태 초기화 + current_page = None + page_start_index = None + return pages + # def parse_pages_by_bookmark(root): + # """ + # BOOKMARK(Name="Page_X_start" ~ "Page_X_end") 사이의
요소들을 + # 페이지 단위로 딕셔너리에 저장 + # """ + # pages = {} + # all_p_tags = root.xpath('//P') + + # current_page = None + # page_start_index = None + + # for i, p in enumerate(all_p_tags): + # # P 안의 모든 BOOKMARK 탐색 + # bookmarks = p.xpath('.//BOOKMARK') + # for bm in bookmarks: + # name = bm.get('Name') + # if name and name.endswith('_start'): + # current_page = name.replace('_start', '') + # page_start_index = i + # elif name and name.endswith('_end') and current_page is not None: + # page_end_index = i + # # 시작~끝까지 P 태그 묶음 저장 + # page_content = all_p_tags[page_start_index:page_end_index + 1] + # pages[current_page] = page_content + # current_page = None + # page_start_index = None + + # return pages def extract_char_text_from_p(p_element): """ @@ -249,6 +294,30 @@ class XMLScorer: } try: + has_page2_rectangle = False + if "Rectangle" in (category or ""): + def has_elements(ptags, xpath): + for p in ptags: + element_list = p.xpath(xpath) if xpath else [] + if element_list: + return True + return False + + page2_ptags = pages.get('Page_2', []) + rectangle_xpath = ".//RECTANGLE" + has_page2_rectangle = has_elements(page2_ptags, rectangle_xpath) + + if not has_page2_rectangle: + user_answer = None + self.evaluate_answer(scoring, user_answer, right_answer, points, method="equal") + continue + else: + rect_charshapes = [] + rect_parashapes = [] + for p in page2_ptags: + rect_charshapes.extend(p.xpath(".//RECTANGLE//TEXT/@CharShape")) + rect_parashapes.extend(p.xpath(".//RECTANGLE//P/@ParaShape")) + # 머릿말과 관련된 문항에서 1페이지에 머릿말이 없는 경우의 처리 # [1-25, 26, 27] 문항 'DIAT' 머릿말 채점시 1페이지에 머릿말이 없으면 # 채점하지 않고 0점 처리 @@ -269,24 +338,6 @@ class XMLScorer: self.evaluate_answer(scoring, user_answer, right_answer, points, method="equal") continue - has_page2_rectangle = False - if "Rectangle" in (category or ""): - def has_elements(ptags, xpath): - for p in ptags: - element_list = p.xpath(xpath) if xpath else [] - if element_list: - return True - return False - - page2_ptags = pages.get('Page_2', []) - rectangle_xpath = ".//RECTANGLE" - has_page2_rectangle = has_elements(page2_ptags, rectangle_xpath) - - if not has_page2_rectangle: - user_answer = None - self.evaluate_answer(scoring, user_answer, right_answer, points, method="equal") - continue - if (category or "") == "PageSetting": items = root.xpath(xpath) error_range = criterion.get('tolerance', 0) @@ -458,20 +509,19 @@ class XMLScorer: # 2페이지의 글상자의 ParaShape ID를 동적으로 찾아서 채점 elif "TextBoxAlign" in (category or ""): if has_page2_rectangle: - # 2페이지 내에서만 검색 - search_root = etree.Element("Page_2") - for p in page2_ptags: - search_root.append(p) - rectangle_parashape_id = search_root.xpath(".//RECTANGLE/ancestor::P[last()]/@ParaShape") - + parashape_list = rect_parashapes + else: - # 전체 root에서 검색 - rectangle_parashape_id = root.xpath(".//RECTANGLE/ancestor::P[last()]/@ParaShape") + parashape_list = root.xpath(xpath) - # ParaShape ID가 있는 경우에만 xpath 치환 & 실행 - if rectangle_parashape_id: - xpath = xpath.replace('{rectangle_parashape_id}', rectangle_parashape_id[0]) - items = root.xpath(xpath) + # parashape ID가 있는 경우에만 xpath 치환 & 실행 + result_items = [] + if parashape_list: + for parashape_id in parashape_list: + exec_xpath = xpath.replace('{rect_parashape_id}', parashape_id) + items = root.xpath(exec_xpath) + result_items.extend(items) + else: # RECTANGLE이 없으면 items는 빈 리스트 items = [None] @@ -482,16 +532,26 @@ class XMLScorer: if scoring['points'] > 0: break + # if has_page2_rectangle: + # # 2페이지 내에서만 검색 + # search_root = etree.Element("Page_2") + # for p in page2_ptags: + # search_root.append(p) + # rect_parashape_id = search_root.xpath(".//RECTANGLE/ancestor::P[last()]/@ParaShape") + + # else: + # # 전체 root에서 검색 + # rect_parashape_id = root.xpath(".//RECTANGLE/ancestor::P[last()]/@ParaShape") + # 정답이 하나인 경우 # elif (category or "") in ["OneAnswer", "ChartOneAnswer"]: - elif "OneAnswer" in (category or ""): + elif "OneAnswer" in (category or ""): + items = [] + items2 = [] if has_page2_rectangle: - search_root = etree.Element("Page_2") for p in page2_ptags: - search_root.append(p) - items = search_root.xpath(xpath) if xpath else [] - items2 = search_root.xpath(xpath2) if xpath2 else [] - + items.extend(p.xpath(xpath) if xpath else "") + items2.extend(p.xpath(xpath2) if xpath2 else "") else: items = root.xpath(xpath) if xpath else [] items2 = root.xpath(xpath2) if xpath2 else [] @@ -527,12 +587,10 @@ class XMLScorer: # [2-6] 테두리 이중실선 1.00mm elif "LineShape" in (category or ""): + line_shapes = [] if has_page2_rectangle: - search_root = etree.Element("Page_2") for p in page2_ptags: - search_root.append(p) - line_shapes = search_root.xpath(xpath) if xpath else [] - + line_shapes.extend(p.xpath(xpath) if xpath else []) else: line_shapes = root.xpath(xpath) if xpath else [] @@ -555,13 +613,26 @@ class XMLScorer: # 사용자 입력값이 mm단위인 경우 # elif (category or "") == "mmSize": elif "mmSize" in (category or ""): + items = [] if has_page2_rectangle: - search_root = etree.Element("Page_2") for p in page2_ptags: - search_root.append(p) - items = search_root.xpath(xpath) + items.extend(p.xpath(xpath)) else: + # print(etree.tostring(root, pretty_print=True, encoding="unicode")) + + # 단계별 점검 + # print("1) PICTURE 태그 존재 여부:", root.xpath("//PICTURE")) + # print("2) BINITEM 태그 존재 여부:", root.xpath("//BINITEM")) + # print("3) BINITEM Format 값:", root.xpath("//BINITEM/@Format")) + # print("4) BINITEM BinData 값:", root.xpath("//BINITEM/@BinData")) + # print("5) IMAGE BinItem 값:", root.xpath("//IMAGE/@BinItem")) + # print("6) PICTURE 안의 IMAGE 존재 여부:", root.xpath("//PICTURE/IMAGE")) + # print("7) SIZE Width 값:", root.xpath("//PICTURE/SHAPEOBJECT/SIZE/@Width")) + + # 원래 쓰시던 XPath 그대로 결과 확인 + + # print("8) 원래 XPath 결과:", root.xpath(xpath_expr)) items = root.xpath(xpath) # 오차범위 설정 # 한글 프로그램 내부에서 드물게 0mm이지만 1pt로 저장되는 경우가 있음 @@ -613,12 +684,12 @@ class XMLScorer: # 채점기준표 파일에 작성된 rgb값을 그대로 읽어와 HML파일 요소의 int형 rgb값과 비교 elif "Color" in (category or ""): + items = [] + items2 = [] if has_page2_rectangle: - search_root = etree.Element("Page_2") for p in page2_ptags: - search_root.append(p) - items = search_root.xpath(xpath) if xpath else [] - items2 = search_root.xpath(xpath2) if xpath2 else [] + items.extend(p.xpath(xpath) if xpath else "") + items2.extend(p.xpath(xpath2) if xpath2 else "") else: items = root.xpath(xpath) if xpath else [] @@ -658,11 +729,8 @@ class XMLScorer: # 폰트명 elif "FontName" in (category or ""): - if has_page2_rectangle: - search_root = etree.Element("Page_2") - for p in page2_ptags: - search_root.append(p) - charshape_list = search_root.xpath(xpath) + if has_page2_rectangle: + charshape_list = rect_charshapes else: charshape_list = root.xpath(xpath) @@ -712,6 +780,32 @@ class XMLScorer: score = points if any_match else 0 self.evaluate_answer(scoring, matched_user_answer if any_match else "", right_answer, score) + elif "FontSize" in (category or ""): + if has_page2_rectangle: + charshape_list = rect_charshapes + + else: + charshape_list = root.xpath(xpath) + + # CharShape ID가 있는 경우에만 xpath 치환 & 실행 + result_items = [] + if charshape_list: + for charshape_id in charshape_list: + exec_xpath = xpath.replace('{rect_charshape_id}', charshape_id) + items = root.xpath(exec_xpath) + result_items.extend(items) + + else: + # RECTANGLE이 없으면 items는 빈 리스트 + items = [None] + + for item in items: + user_answer = item + self.evaluate_answer(scoring, user_answer, right_answer, points) + if scoring['points'] > 0: + break + + # 폰트 속성 elif (category or "") == "FontAttribute": # 하이퍼링크 처리 @@ -1378,8 +1472,8 @@ class XMLScorer: def main(): # 시험회차 및 유형 - # exam_round = '2508' - exam_round = '2522' + exam_round = '2508' + # exam_round = '2522' # 채점하고자 하는 유형은 주석 해제 exam_types = [ @@ -1389,8 +1483,8 @@ def main(): # 'D', ] - # test_mode = False - test_mode = True #/TEST 폴더 채점시 + test_mode = False + # test_mode = True #/TEST 폴더 채점시 output_excel_paths = [] for exam_type in exam_types: diff --git a/zzz.xbook b/zzz.xbook index ccd3958..695580d 100644 --- a/zzz.xbook +++ b/zzz.xbook @@ -1 +1 @@ -[{"kind":2,"language":"xpath","value":"//a:t[text()='클라우드 보안투자']/ancestor::a:r//a:ea/@typeface"},{"kind":2,"language":"xpath","value":"boolean(//FONTFACE[@Lang='Hangul']/FONT[@Id=//CHARSHAPE/FONTID/@Hangul]/@Name='바탕' and //CHARSHAPE/@Height='1000' and //PARASHAPE/PARAMARGIN/@LineSpacing='160' and //PARASHAPE/@Align='Justify')"},{"kind":2,"language":"xpath","value":"//FONTFACE[@Lang='Hangul']/FONT[@Id=//CHARSHAPE/FONTID/@Hangul]/@Name='바탕'"},{"kind":2,"language":"xpath","value":"//PARASHAPE[@Id=//RECTANGLE/ancestor::P[last()]/@ParaShape]/@Align"},{"kind":2,"language":"xpath","value":"//RECTANGLE//LINESHAPE"},{"kind":2,"language":"xpath","value":"//RECTANGLE/SHAPEOBJECT/SIZE/@Width"},{"kind":2,"language":"xpath","value":"//HEADER//P"},{"kind":2,"language":"xpath","value":"//P[.//FIELDBEGIN[@Type='Hyperlink'] and .//CHAR[contains(., 'http')]]"},{"kind":2,"language":"xpath","value":"//PICTURE[./IMAGE[@BinItem=//BINITEM[@Format='JPG']/@BinData]]/SHAPEOBJECT/POSITION[not(@TreatAsChar='true')]/@HorzOffset"},{"kind":2,"language":"xpath","value":"//CHAR[contains(string(.), '※')]/descendant-or-self::text()"},{"kind":2,"language":"xpath","value":"//P[@ParaShape=\"17\"]/TEXT[@CharShape='7']//CHAR[string(.)]"},{"kind":2,"language":"xpath","value":"//CHAR[contains(string(.), '기타')]/text()"}] \ No newline at end of file +[{"kind":2,"language":"xpath","value":"//a:t[text()='클라우드 보안투자']/ancestor::a:r//a:ea/@typeface"},{"kind":2,"language":"xpath","value":"boolean(//FONTFACE[@Lang='Hangul']/FONT[@Id=//CHARSHAPE/FONTID/@Hangul]/@Name='바탕' and //CHARSHAPE/@Height='1000' and //PARASHAPE/PARAMARGIN/@LineSpacing='160' and //PARASHAPE/@Align='Justify')"},{"kind":2,"language":"xpath","value":"//FONTFACE[@Lang='Hangul']/FONT[@Id=//CHARSHAPE/FONTID/@Hangul]/@Name='바탕'"},{"kind":2,"language":"xpath","value":"//BINITEM[@BinData=//PICTURE/IMAGE/@BinItem][@Format='JPG' or @Format='JPEG']"},{"kind":2,"language":"xpath","value":".//RECTANGLE//TEXT/@CharShape"},{"kind":2,"language":"xpath","value":"//RECTANGLE/SHAPEOBJECT/SIZE/@Width"},{"kind":2,"language":"xpath","value":"//HEADER//P"},{"kind":2,"language":"xpath","value":"//P[.//FIELDBEGIN[@Type='Hyperlink'] and .//CHAR[contains(., 'http')]]"},{"kind":2,"language":"xpath","value":"//PICTURE[./IMAGE[@BinItem=//BINITEM[@Format='JPG']/@BinData]]/SHAPEOBJECT/POSITION[not(@TreatAsChar='true')]/@HorzOffset"},{"kind":2,"language":"xpath","value":"//CHAR[contains(string(.), '※')]/descendant-or-self::text()"},{"kind":2,"language":"xpath","value":"//P[@ParaShape=\"17\"]/TEXT[@CharShape='7']//CHAR[string(.)]"},{"kind":2,"language":"xpath","value":"//CHAR[contains(string(.), '기타')]/text()"}] \ No newline at end of file