diff --git a/250909_DIW_2508C_TEST.xlsx b/250909_DIW_2508C_TEST.xlsx new file mode 100644 index 0000000..7297518 Binary files /dev/null and b/250909_DIW_2508C_TEST.xlsx differ diff --git a/DIW_2508C.json b/DIW_2508C.json index 3df41c9..ca67d29 100644 --- a/DIW_2508C.json +++ b/DIW_2508C.json @@ -351,89 +351,79 @@ "item": "② 다단 2단" }, "4": { - "path": "//RECTANGLE//CHAR[text()='{searchValue}']/ancestor::RECTANGLE/SHAPEOBJECT/SIZE/@Width", - "searchValue": "구강건강관리", + "path": "//RECTANGLE/SHAPEOBJECT/SIZE/@Width", "value": "60", "points": 2, - "category": "mmSize", + "category": "Rectangle.mmSize", "item": "문구 (구강건강관리)/① 크기-너비 (60 mm)" }, "5": { - "path": "//RECTANGLE//CHAR[text()='{searchValue}']/ancestor::RECTANGLE/SHAPEOBJECT/SIZE/@Height", - "searchValue": "구강건강관리", + "path": "//RECTANGLE/SHAPEOBJECT/SIZE/@Height", "value": "12", "points": 2, - "category": "mmSize", + "category": "Rectangle.mmSize", "item": "문구 (구강건강관리)/② 크기-높이 (12 mm)" }, "6": { - "path": "//RECTANGLE[.//CHAR[text()='{searchValue}']]//LINESHAPE", - "searchValue": "구강건강관리", + "path": "//RECTANGLE//LINESHAPE", "value": { "Style": "DoubleSlim", "Width": "283" }, "points": 2, - "category": "LineShape", + "category": "Rectangle.LineShape", "item": "문구 (구강건강관리)/③ 테두리 : 이중 실선(1.00mm)", "desc": "1mm = 283pt value['Width']에 pt값 입력" }, "7": { - "path": "//RECTANGLE[.//CHAR[text()='{searchValue}']]/@Ratio", - "searchValue": "구강건강관리", + "path": "//RECTANGLE/@Ratio", "value": "20", "points": 2, - "category": "OneAnswer", + "category": "Rectangle.OneAnswer", "item": "문구 (구강건강관리)/④ 글상자 모서리 (반원)", "desc": "모서리 비율 반원:50 / 둥근모양:20" }, "8": { - "path": "//RECTANGLE[.//CHAR[text()='{searchValue}']]//WINDOWBRUSH/@FaceColor", - "searchValue": "구강건강관리", + "path": "//RECTANGLE//WINDOWBRUSH/@FaceColor", "value": "187,140,209", "points": 2, - "category": "Color", + "category": "Rectangle.Color", "item": "문구 (구강건강관리)/⑤ 채우기 : 색상(RGB:187,140,209)" }, "9": { - "path": "//RECTANGLE[.//CHAR[text()='{searchValue}']]/SHAPEOBJECT/POSITION/@TreatAsChar", - "searchValue": "구강건강관리", + "path": "//RECTANGLE/SHAPEOBJECT/POSITION/@TreatAsChar", "value": "true", "points": 1, - "category": "OneAnswer", + "category": "Rectangle.OneAnswer", "item": "문구 (구강건강관리)/⑥ 글상자 위치 (글자처럼 취급)" }, "10": { - "path": "//PARASHAPE[@Id=//RECTANGLE//CHAR[text()='{searchValue}']/ancestor::P[last()]/@ParaShape]/@Align", - "searchValue": "구강건강관리", + "path": "//PARASHAPE[@Id='{rectangle_parashape_id}']/@Align", "value": "Center", "points": 1, - "category": "OneAnswer", + "category": "Rectangle.TextBoxAlign", "item": "문구 (구강건강관리)/⑦ 글상자 정렬 (가운데 정렬)" }, "11": { "path": "//TEXT[CHAR[text()='{searchValue}']]/@CharShape", - "searchValue": "구강건강관리", "value": "맑은 고딕", "points": 1, - "category": "FontName", + "category": "Rectangle.FontName", "item": "문구 (구강건강관리)/⑧ 글씨체 (맑은 고딕)" }, "12": { "path": "//CHARSHAPE[@Id=//RECTANGLE//TEXT[./CHAR[text()='{searchValue}']]/@CharShape]/@Height", - "searchValue": "구강건강관리", "value": "2300", "points": 1, - "category": "OneAnswer", + "category": "Rectangle.OneAnswer", "item": "문구 (구강건강관리)/⑨ 글씨크기 (2300)", "desc": "1pt당 100" }, "13": { "path": "//PARASHAPE[@Id=//RECTANGLE//P[.//CHAR[text()='{searchValue}']]/@ParaShape]/@Align", - "searchValue": "구강건강관리", "value": "Center", "points": 1, - "category": "OneAnswer", + "category": "Rectangle.OneAnswer", "item": "문구 (구강건강관리)/⑩ 정렬 (가운데 정렬)" }, "14": { diff --git a/diwScoring2.py b/diwScoring2.py index da7ff8d..a01c983 100644 --- a/diwScoring2.py +++ b/diwScoring2.py @@ -3,7 +3,7 @@ import difflib import json from pathlib import Path import os -from lxml import etree as ET +from lxml import etree import re from difflib import SequenceMatcher import pandas as pd @@ -133,32 +133,6 @@ class XMLScorer: # 하나의 XML 파일 채점 def _score_xml_file(self, xml_file, chart_xml): - # def parse_pages_by_bookmark(root): - # """ - # P/TEXT/BOOKMARK 구조를 가진 XML에서 페이지 구간별

요소를 파싱하여 반환 - # """ - # pages = {} - # all_p_tags = root.xpath('//P') - - # current_page = None - # page_start_index = None - - # for i, p in enumerate(all_p_tags): - # # BOOKMARK가 존재하는지 확인 (어디에 있든 탐색) - # bookmarks = p.xpath('.//BOOKMARK') - # for bm in bookmarks: - # name = bm.get('Name') - # if name and name.endswith('_start'): - # current_page = name.replace('_start', '') - # page_start_index = i - # elif name and name.endswith('_end') and current_page is not None: - # page_end_index = i - # page_content = all_p_tags[page_start_index:page_end_index + 1] - # pages[current_page] = page_content - # current_page = None - # page_start_index = None - - # return pages def parse_pages_by_bookmark(root): """ BOOKMARK(Name="Page_X_start" ~ "Page_X_end") 사이의

요소들을 @@ -201,7 +175,7 @@ class XMLScorer: return full_text try: - tree = ET.parse(xml_file) + tree = etree.parse(xml_file) root = tree.getroot() # XML문서 페이지 파싱 전처리 @@ -216,9 +190,9 @@ class XMLScorer: # 차트 XML 파일이 없는 경우 0점 채점을 위헤 빈 XML 생성 if chart_xml is None: - chart_tree = ET.fromstring('') + chart_tree = etree.fromstring('') else: - chart_tree = ET.fromstring(chart_xml) + chart_tree = etree.fromstring(chart_xml) # 결과값을 Dictionary로 저장 # 하나의 xml파일 = 수험생 한명의 답안지 @@ -275,6 +249,9 @@ class XMLScorer: } try: + # 머릿말과 관련된 문항에서 1페이지에 머릿말이 없는 경우의 처리 + # [1-25, 26, 27] 문항 'DIAT' 머릿말 채점시 1페이지에 머릿말이 없으면 + # 채점하지 않고 0점 처리 if "Header" in (category or ""): def has_elements(ptags, xpath): for p in ptags: @@ -285,9 +262,27 @@ class XMLScorer: page1_ptags = pages.get('Page_1', []) header_xpath = ".//HEADER//P" - has_page1_element = has_elements(page1_ptags, header_xpath) + has_page1_header = has_elements(page1_ptags, header_xpath) - if not has_page1_element: + if not has_page1_header: + user_answer = None + self.evaluate_answer(scoring, user_answer, right_answer, points, method="equal") + continue + + has_page2_rectangle = False + if "Rectangle" in (category or ""): + def has_elements(ptags, xpath): + for p in ptags: + element_list = p.xpath(xpath) if xpath else [] + if element_list: + return True + return False + + page2_ptags = pages.get('Page_2', []) + rectangle_xpath = ".//RECTANGLE" + has_page2_rectangle = has_elements(page2_ptags, rectangle_xpath) + + if not has_page2_rectangle: user_answer = None self.evaluate_answer(scoring, user_answer, right_answer, points, method="equal") continue @@ -458,13 +453,48 @@ class XMLScorer: self.evaluate_answer(scoring, user_answer, right_answer, points) if scoring['points'] > 0: break - + + # 글상자 정렬 [2-10] 문항 + # 2페이지의 글상자의 ParaShape ID를 동적으로 찾아서 채점 + elif "TextBoxAlign" in (category or ""): + if has_page2_rectangle: + # 2페이지 내에서만 검색 + search_root = etree.Element("Page_2") + for p in page2_ptags: + search_root.append(p) + rectangle_parashape_id = search_root.xpath(".//RECTANGLE/ancestor::P[last()]/@ParaShape") + else: + # 전체 root에서 검색 + rectangle_parashape_id = root.xpath(".//RECTANGLE/ancestor::P[last()]/@ParaShape") + + # ParaShape ID가 있는 경우에만 xpath 치환 & 실행 + if rectangle_parashape_id: + xpath = xpath.replace('{rectangle_parashape_id}', rectangle_parashape_id[0]) + items = root.xpath(xpath) + else: + # RECTANGLE이 없으면 items는 빈 리스트 + items = [None] + + for item in items: + user_answer = item + self.evaluate_answer(scoring, user_answer, right_answer, points) + if scoring['points'] > 0: + break + # 정답이 하나인 경우 # elif (category or "") in ["OneAnswer", "ChartOneAnswer"]: - elif "OneAnswer" in (category or ""): - items = root.xpath(xpath) if xpath else [] - items2 = root.xpath(xpath2) if xpath2 else [] + elif "OneAnswer" in (category or ""): + if has_page2_rectangle: + search_root = etree.Element("Page_2") + for p in page2_ptags: + search_root.append(p) + items = search_root.xpath(xpath) if xpath else [] + items2 = search_root.xpath(xpath2) if xpath2 else [] + + else: + items = root.xpath(xpath) if xpath else [] + items2 = root.xpath(xpath2) if xpath2 else [] # 차트 XML에서 정답을 찾는 경우 # 차트 종류가 @@ -496,8 +526,15 @@ class XMLScorer: break # [2-6] 테두리 이중실선 1.00mm - elif (category or "") == "LineShape": - line_shapes = root.xpath(xpath) if xpath else [] + elif "LineShape" in (category or ""): + if has_page2_rectangle: + search_root = etree.Element("Page_2") + for p in page2_ptags: + search_root.append(p) + line_shapes = search_root.xpath(xpath) if xpath else [] + + else: + line_shapes = root.xpath(xpath) if xpath else [] user_answer = { 'Style': None, @@ -516,8 +553,16 @@ class XMLScorer: break # 사용자 입력값이 mm단위인 경우 - elif (category or "") == "mmSize": - items = root.xpath(xpath) + # elif (category or "") == "mmSize": + elif "mmSize" in (category or ""): + if has_page2_rectangle: + search_root = etree.Element("Page_2") + for p in page2_ptags: + search_root.append(p) + items = search_root.xpath(xpath) + + else: + items = root.xpath(xpath) # 오차범위 설정 # 한글 프로그램 내부에서 드물게 0mm이지만 1pt로 저장되는 경우가 있음 # @@ -567,10 +612,18 @@ class XMLScorer: self.evaluate_answer(scoring, user_answer, right_answer, points) # 채점기준표 파일에 작성된 rgb값을 그대로 읽어와 HML파일 요소의 int형 rgb값과 비교 - elif (category or "") == "Color": - items = root.xpath(xpath) if xpath else [] - items2 = root.xpath(xpath2) if xpath2 else [] - + elif "Color" in (category or ""): + if has_page2_rectangle: + search_root = etree.Element("Page_2") + for p in page2_ptags: + search_root.append(p) + items = search_root.xpath(xpath) if xpath else [] + items2 = search_root.xpath(xpath2) if xpath2 else [] + + else: + items = root.xpath(xpath) if xpath else [] + items2 = root.xpath(xpath2) if xpath2 else [] + rgb_text = right_answer # 정규식을 이용해 숫자만 리스트로 추출 @@ -605,8 +658,15 @@ class XMLScorer: # 폰트명 elif "FontName" in (category or ""): - charshape_list = root.xpath(xpath) - + if has_page2_rectangle: + search_root = etree.Element("Page_2") + for p in page2_ptags: + search_root.append(p) + charshape_list = search_root.xpath(xpath) + + else: + charshape_list = root.xpath(xpath) + # 문자속성이 없는 경우 if not charshape_list: user_answer = "" @@ -978,7 +1038,7 @@ class XMLScorer: onePersonResult['total_score'] = self.total_score return onePersonResult - except ET.ParseError as e: + except etree.ParseError as e: return { 'filename': os.path.basename(xml_file), 'error': f"XML 파싱 오류: {str(e)}", @@ -986,7 +1046,7 @@ class XMLScorer: } def binary_to_chartxml(self, xml_path): - tree = ET.parse(xml_path) + tree = etree.parse(xml_path) root = tree.getroot() binary_data = root.xpath('//BINDATA[@Id=//BINITEM[@Format="OLE"]/@BinData]/text()') @@ -1038,8 +1098,8 @@ class XMLScorer: # 2. 공백제거, 특정 형식 제거 # 3. 리스트를 문자열로 변환 - user_answer_root = ET.parse(user_answer_file).getroot() - correct_answer_root = ET.parse(correct_answer_file).getroot() + user_answer_root = etree.parse(user_answer_file).getroot() + correct_answer_root = etree.parse(correct_answer_file).getroot() # xpath로 바이너리 부분추출 user_input_text = user_answer_root.xpath('//CHAR//text()[not(ancestor::HEADER) and not(ancestor::TABLE)]') @@ -1054,7 +1114,7 @@ class XMLScorer: # 차트 XML에서 차트제목 추출 if chart_xml is not None: - chart_xml_tree = ET.fromstring(chart_xml) + chart_xml_tree = etree.fromstring(chart_xml) ns = {'c': 'http://schemas.openxmlformats.org/drawingml/2006/chart', 'a': 'http://schemas.openxmlformats.org/drawingml/2006/main'} xpath_expr = '/c:chartSpace/c:chart/c:title/c:tx/c:rich/a:p/a:r/a:t' @@ -1325,12 +1385,12 @@ def main(): exam_types = [ # 'A', # 'B', - # 'C', - 'D', + 'C', + # 'D', ] - test_mode = False - # test_mode = True #/TEST 폴더 채점시 + # test_mode = False + test_mode = True #/TEST 폴더 채점시 output_excel_paths = [] for exam_type in exam_types: diff --git a/zzz.xbook b/zzz.xbook index 0b62787..ccd3958 100644 --- a/zzz.xbook +++ b/zzz.xbook @@ -1 +1 @@ -[{"kind":2,"language":"xpath","value":"//a:t[text()='클라우드 보안투자']/ancestor::a:r//a:ea/@typeface"},{"kind":2,"language":"xpath","value":"boolean(//FONTFACE[@Lang='Hangul']/FONT[@Id=//CHARSHAPE/FONTID/@Hangul]/@Name='바탕' and //CHARSHAPE/@Height='1000' and //PARASHAPE/PARAMARGIN/@LineSpacing='160' and //PARASHAPE/@Align='Justify')"},{"kind":2,"language":"xpath","value":"//FONTFACE[@Lang='Hangul']/FONT[@Id=//CHARSHAPE/FONTID/@Hangul]/@Name='바탕'"},{"kind":2,"language":"xpath","value":"//RECTANGLE//CHAR[text()='구강건강관거리']/ancestor::RECTANGLE/SHAPEOBJECT/SIZE/@Width"},{"kind":2,"language":"xpath","value":"//CHAR[contains(text(),'예방')][contains(text(),'豫防')]"},{"kind":2,"language":"xpath","value":"//TEXT[CHAR[text()='DIAT']]"},{"kind":2,"language":"xpath","value":"//HEADER//P"},{"kind":2,"language":"xpath","value":"//P[.//FIELDBEGIN[@Type='Hyperlink'] and .//CHAR[contains(., 'http')]]"},{"kind":2,"language":"xpath","value":"//PICTURE[./IMAGE[@BinItem=//BINITEM[@Format='JPG']/@BinData]]/SHAPEOBJECT/POSITION[not(@TreatAsChar='true')]/@HorzOffset"},{"kind":2,"language":"xpath","value":"//CHAR[contains(string(.), '※')]/descendant-or-self::text()"},{"kind":2,"language":"xpath","value":"//P[@ParaShape=\"17\"]/TEXT[@CharShape='7']//CHAR[string(.)]"},{"kind":2,"language":"xpath","value":"//CHAR[contains(string(.), '기타')]/text()"}] \ No newline at end of file +[{"kind":2,"language":"xpath","value":"//a:t[text()='클라우드 보안투자']/ancestor::a:r//a:ea/@typeface"},{"kind":2,"language":"xpath","value":"boolean(//FONTFACE[@Lang='Hangul']/FONT[@Id=//CHARSHAPE/FONTID/@Hangul]/@Name='바탕' and //CHARSHAPE/@Height='1000' and //PARASHAPE/PARAMARGIN/@LineSpacing='160' and //PARASHAPE/@Align='Justify')"},{"kind":2,"language":"xpath","value":"//FONTFACE[@Lang='Hangul']/FONT[@Id=//CHARSHAPE/FONTID/@Hangul]/@Name='바탕'"},{"kind":2,"language":"xpath","value":"//PARASHAPE[@Id=//RECTANGLE/ancestor::P[last()]/@ParaShape]/@Align"},{"kind":2,"language":"xpath","value":"//RECTANGLE//LINESHAPE"},{"kind":2,"language":"xpath","value":"//RECTANGLE/SHAPEOBJECT/SIZE/@Width"},{"kind":2,"language":"xpath","value":"//HEADER//P"},{"kind":2,"language":"xpath","value":"//P[.//FIELDBEGIN[@Type='Hyperlink'] and .//CHAR[contains(., 'http')]]"},{"kind":2,"language":"xpath","value":"//PICTURE[./IMAGE[@BinItem=//BINITEM[@Format='JPG']/@BinData]]/SHAPEOBJECT/POSITION[not(@TreatAsChar='true')]/@HorzOffset"},{"kind":2,"language":"xpath","value":"//CHAR[contains(string(.), '※')]/descendant-or-self::text()"},{"kind":2,"language":"xpath","value":"//P[@ParaShape=\"17\"]/TEXT[@CharShape='7']//CHAR[string(.)]"},{"kind":2,"language":"xpath","value":"//CHAR[contains(string(.), '기타')]/text()"}] \ No newline at end of file