diff --git a/250627_DIW_2505A_TEST.xlsx b/250627_DIW_2505A_TEST.xlsx new file mode 100644 index 0000000..9b09b88 Binary files /dev/null and b/250627_DIW_2505A_TEST.xlsx differ diff --git a/DIW_2505A.json b/DIW_2505A.json index f17b345..7f947f9 100644 --- a/DIW_2505A.json +++ b/DIW_2505A.json @@ -342,7 +342,7 @@ "desc": "섹션이 1개 이상이면 점수부여" }, "3": { - "path": "//COLDEF", + "path": "TEXT/COLDEF/@Count", "value": "2", "points": 3, "category": "TwoColumn", diff --git a/diwScoring2.py b/diwScoring2.py index cf58c98..a31f4af 100644 --- a/diwScoring2.py +++ b/diwScoring2.py @@ -127,6 +127,33 @@ class XMLScorer: # 하나의 XML 파일 채점 def _score_xml_file(self, xml_file, chart_xml): + def parse_pages_by_bookmark(root): + """ + P/TEXT/BOOKMARK 구조를 가진 XML에서 페이지 구간별
요소를 파싱하여 반환 + """ + pages = {} + all_p_tags = root.xpath('//P') + + current_page = None + page_start_index = None + + for i, p in enumerate(all_p_tags): + # BOOKMARK가 존재하는지 확인 + bookmark = p.xpath('./TEXT/BOOKMARK') + if bookmark: + name = bookmark[0].get('Name') + if name and name.endswith('_start'): + current_page = name.replace('_start', '') + page_start_index = i + elif name and name.endswith('_end') and current_page is not None: + page_end_index = i + page_content = all_p_tags[page_start_index:page_end_index + 1] + pages[current_page] = page_content + current_page = None + page_start_index = None + + return pages + def extract_char_text_from_p(p_element): """ 주어진
요소에서 모든 자손 요소 가져오기
- p_elements = root.xpath('//SECTION/P')
-
- # PageBreak='true' 속성을 가진 P태그 인덱스
- # [=쪽나눔 이후 페이지의 첫 문단들]
- pagebreak_index_list = []
- for i, p in enumerate(p_elements):
- xml_index = i + 1
- if p.get("PageBreak") == "true":
- pagebreak_index_list.append(xml_index)
-
- # 페이지 별 시작 문단~끝 문단 구간 저장
- page_ranges = []
- start = 1 # XML은 1-based index
- # pagebreak_index_list에 따라 구간 나누기
- for index in pagebreak_index_list:
- end = index - 1
- page_ranges.append((start, end))
- start = index
-
- # 마지막 페이지 구간 추가
- page_ranges.append((start, len(p_elements))) # 끝까지
-
- # 출력 확인
- # for i, (start, end) in enumerate(page_ranges, 1):
- # print(f"📄 Page {i}: {start} ~ {end}")
+ for p in page2_ptags:
+ column_count = p.xpath(xpath)
+ user_answer = column_count[0] if column_count else '0'
- # 단수 구간 결과를 저장할 리스트
- column_sections = []
-
- current_count = None
- start_index = None
-
- # 모든 P태그를 순회하며 단 나눔이 1단인 구간과 2단인 구간을 저장
- for i, p in enumerate(p_elements):
- xml_index = i + 1 # XML 기준 1-based index
- coldef = p.xpath('.//COLDEF')
-
- if coldef:
- # 다단 수(2단)
- column_count = coldef[0].get('Count')
-
- # 첫 번째 Count 발견 시 시작점 설정
- if current_count is None:
- current_count = column_count
- start_index = i
-
- # Count 값이 변경되었을 때 이전 구간을 저장
- elif column_count != current_count:
- column_sections.append((start_index, i - 1, current_count))
- # 새 구간 시작
- current_count = column_count
- start_index = i
-
- # 마지막 구간 저장
- if current_count is not None and start_index is not None:
- column_sections.append((start_index, len(p_elements) - 1, current_count))
-
- # 결과 출력
- # for start, end, count in column_sections:
- # xml_start = start + 1 # XML 기준 1-based index
- # xml_end = end + 1
- # print(f"📄 {count}단 구간: P[{xml_start}] ~ P[{xml_end}]")
-
- # 2페이지 구간 가져오기 (인덱스는 0-based지만 값은 1-based)
- if len(page_ranges) > 1:
- second_page_start, second_page_end = page_ranges[1]
-
- # 2페이지가 없을 경우 1페이지(문서 전체) 내용으로 대체
- # 문서 전체에서 2단 문단이 있을 경우는 정답
- else:
- second_page_start, second_page_end = page_ranges[0]
- # 2페이지가 없을 경우 오답 처리
- # else:
- # user_answer = None
-
- # 2단 포함 여부 확인 변수
- has_two_column_in_page2 = False
-
- # 2단 구간이 2페이지 범위와 겹치는지 확인
- # col_start : 다단 시작 P태그 인덱스
- # col_end : 다단 끝 P태그 인덱스
- # col_count : 다단 수
- for col_start, col_end, col_count in column_sections:
- two_col_start = col_start + 1 # 1-based
- two_col_end = col_end + 1
-
- if col_count == '2':
- # 구간이 겹치는지 확인
- if two_col_end >= second_page_start and two_col_start <= second_page_end:
- has_two_column_in_page2 = True
- user_answer = col_count
- break
-
- # print("✅ 2페이지에 2단 있음" if has_two_column_in_page2 else "❌ 2페이지에 2단 없음")
-
- if has_two_column_in_page2:
+ if user_answer == right_answer:
self.evaluate_answer(scoring, user_answer, right_answer, points, method="equal")
-
- # SECTION[2]가 존재하는 경우
- else: # has_section2
- coldef_in_section2 = has_section2[0].xpath('//COLDEF')
- has_correct_column_count = False
-
- for coldef in coldef_in_section2:
- column_count = coldef.get('Count')
- user_answer = column_count
- if user_answer == right_answer:
- has_correct_column_count = True
+
+ # P태그들 중 하나라도 다단이 존재할 경우 정답처리
+ if scoring['points'] > 0:
break
-
- if has_correct_column_count:
- self.evaluate_answer(scoring, user_answer, right_answer, points, method="equal")
# 한자
elif (category or "") == "Hanja":
@@ -1031,17 +940,21 @@ class XMLScorer:
user_table_text = user_answer_root.xpath('//TABLE//CHAR//text()')
correct_table_text = correct_answer_root.xpath('//TABLE//CHAR//text()')
+ user_chart_title = ""
+ correct_chart_title = self.scoring_criteria["2"]["50"]["searchValue"]
+
# 차트 XML에서 차트제목 추출
if chart_xml is not None:
chart_xml_tree = ET.fromstring(chart_xml)
-
+ ns = {'c': 'http://schemas.openxmlformats.org/drawingml/2006/chart',
+ 'a': 'http://schemas.openxmlformats.org/drawingml/2006/main'}
+ xpath_expr = '/c:chartSpace/c:chart/c:title/c:tx/c:rich/a:p/a:r/a:t'
+
# 차트 제목 추출
- chart_title = chart_xml_tree.xpath('/c:chartSpace/c:chart/c:title/c:tx/c:rich/a:p/a:r/a:t', namespaces={'c': 'http://schemas.openxmlformats.org/drawingml/2006/chart', 'a': 'http://schemas.openxmlformats.org/drawingml/2006/main'})
+ chart_title = chart_xml_tree.xpath(xpath_expr, namespaces=ns)
# 차트 제목이 존재하는 경우
- if chart_title:
- user_chart_title = chart_title[0].text
- correct_chart_title = self.scoring_criteria["2"]["50"]["searchValue"]
+ user_chart_title = chart_title[0].text if chart_title else ""
try :
ignore_word = self.scoring_criteria["2"]["29"]["ignoreWord"]
@@ -1296,8 +1209,8 @@ class XMLScorer:
def main():
# 시험회차 및 유형
- # exam_round = '2505'
- exam_round = '2506_3'
+ exam_round = '2505'
+ # exam_round = '2506_3'
# 채점하고자 하는 유형은 주석 해제
exam_types = [
diff --git a/zzz.xbook b/zzz.xbook
index c134c20..66bf55a 100644
--- a/zzz.xbook
+++ b/zzz.xbook
@@ -1 +1 @@
-[{"kind":2,"language":"xpath","value":"//a:t[text()='클라우드 보안투자']/ancestor::a:r//a:ea/@typeface"},{"kind":2,"language":"xpath","value":"boolean(//FONTFACE[@Lang='Hangul']/FONT[@Id=//CHARSHAPE/FONTID/@Hangul]/@Name='바탕' and //CHARSHAPE/@Height='1000' and //PARASHAPE/PARAMARGIN/@LineSpacing='160' and //PARASHAPE/@Align='Justify')"},{"kind":2,"language":"xpath","value":"//FONTFACE[@Lang='Hangul']/FONT[@Id=//CHARSHAPE/FONTID/@Hangul]/@Name='바탕'"},{"kind":2,"language":"xpath","value":"//FONTFACE[@Lang='Hangul']/FONT[@Id=//CHARSHAPE/FONTID/@Hangul]/@Name='바탕' and //CHARSHAPE/@Height='1000' and //PARASHAPE/PARAMARGIN/@LineSpacing='160' and //PARASHAPE/@Align='Justify')"},{"kind":2,"language":"xpath","value":"/HWPML/BODY/SECTION/P[19]"},{"kind":2,"language":"xpath","value":"//SECTION"},{"kind":2,"language":"xpath","value":"//P"},{"kind":2,"language":"xpath","value":"//P[.//FIELDBEGIN[@Type='Hyperlink'] and .//CHAR[contains(., 'http')]]"},{"kind":2,"language":"xpath","value":"//PICTURE[./IMAGE[@BinItem=//BINITEM[@Format='JPG']/@BinData]]/SHAPEOBJECT/POSITION[not(@TreatAsChar='true')]/@HorzOffset"},{"kind":2,"language":"xpath","value":"//CHARSHAPE[@Id=//TEXT[CHAR[text()='지']]/@CharShape]/@Height"},{"kind":2,"language":"xpath","value":"//TABLE//CHAR//text()"}]
\ No newline at end of file
+[{"kind":2,"language":"xpath","value":"//a:t[text()='클라우드 보안투자']/ancestor::a:r//a:ea/@typeface"},{"kind":2,"language":"xpath","value":"boolean(//FONTFACE[@Lang='Hangul']/FONT[@Id=//CHARSHAPE/FONTID/@Hangul]/@Name='바탕' and //CHARSHAPE/@Height='1000' and //PARASHAPE/PARAMARGIN/@LineSpacing='160' and //PARASHAPE/@Align='Justify')"},{"kind":2,"language":"xpath","value":"//FONTFACE[@Lang='Hangul']/FONT[@Id=//CHARSHAPE/FONTID/@Hangul]/@Name='바탕'"},{"kind":2,"language":"xpath","value":"//FONTFACE[@Lang='Hangul']/FONT[@Id=//CHARSHAPE/FONTID/@Hangul]/@Name='바탕' and //CHARSHAPE/@Height='1000' and //PARASHAPE/PARAMARGIN/@LineSpacing='160' and //PARASHAPE/@Align='Justify')"},{"kind":2,"language":"xpath","value":"/HWPML/BODY/SECTION/P[19]"},{"kind":2,"language":"xpath","value":"//SECTION"},{"kind":2,"language":"xpath","value":"//P"},{"kind":2,"language":"xpath","value":"//P[.//FIELDBEGIN[@Type='Hyperlink'] and .//CHAR[contains(., 'http')]]"},{"kind":2,"language":"xpath","value":"//PICTURE[./IMAGE[@BinItem=//BINITEM[@Format='JPG']/@BinData]]/SHAPEOBJECT/POSITION[not(@TreatAsChar='true')]/@HorzOffset"},{"kind":2,"language":"xpath","value":"//P//COLDEF/@Count"},{"kind":2,"language":"xpath","value":"//P[.//BOOKMARK/@Name=\"Page_2_start\"]"}]
\ No newline at end of file