글상자 채점 방식 수정 [2-10] 까지 진행중

This commit is contained in:
2025-09-09 17:58:09 +09:00
parent 4c5854e8be
commit d4781a350d
4 changed files with 133 additions and 83 deletions

View File

@@ -3,7 +3,7 @@ import difflib
import json
from pathlib import Path
import os
from lxml import etree as ET
from lxml import etree
import re
from difflib import SequenceMatcher
import pandas as pd
@@ -133,32 +133,6 @@ class XMLScorer:
# 하나의 XML 파일 채점
def _score_xml_file(self, xml_file, chart_xml):
# def parse_pages_by_bookmark(root):
# """
# P/TEXT/BOOKMARK 구조를 가진 XML에서 페이지 구간별 <P> 요소를 파싱하여 반환
# """
# pages = {}
# all_p_tags = root.xpath('//P')
# current_page = None
# page_start_index = None
# for i, p in enumerate(all_p_tags):
# # BOOKMARK가 존재하는지 확인 (어디에 있든 탐색)
# bookmarks = p.xpath('.//BOOKMARK')
# for bm in bookmarks:
# name = bm.get('Name')
# if name and name.endswith('_start'):
# current_page = name.replace('_start', '')
# page_start_index = i
# elif name and name.endswith('_end') and current_page is not None:
# page_end_index = i
# page_content = all_p_tags[page_start_index:page_end_index + 1]
# pages[current_page] = page_content
# current_page = None
# page_start_index = None
# return pages
def parse_pages_by_bookmark(root):
"""
BOOKMARK(Name="Page_X_start" ~ "Page_X_end") 사이의 <P> 요소들을
@@ -201,7 +175,7 @@ class XMLScorer:
return full_text
try:
tree = ET.parse(xml_file)
tree = etree.parse(xml_file)
root = tree.getroot()
# XML문서 페이지 파싱 전처리
@@ -216,9 +190,9 @@ class XMLScorer:
# 차트 XML 파일이 없는 경우 0점 채점을 위헤 빈 XML 생성
if chart_xml is None:
chart_tree = ET.fromstring('<xml></xml>')
chart_tree = etree.fromstring('<xml></xml>')
else:
chart_tree = ET.fromstring(chart_xml)
chart_tree = etree.fromstring(chart_xml)
# 결과값을 Dictionary로 저장
# 하나의 xml파일 = 수험생 한명의 답안지
@@ -275,6 +249,9 @@ class XMLScorer:
}
try:
# 머릿말과 관련된 문항에서 1페이지에 머릿말이 없는 경우의 처리
# [1-25, 26, 27] 문항 'DIAT' 머릿말 채점시 1페이지에 머릿말이 없으면
# 채점하지 않고 0점 처리
if "Header" in (category or ""):
def has_elements(ptags, xpath):
for p in ptags:
@@ -285,9 +262,27 @@ class XMLScorer:
page1_ptags = pages.get('Page_1', [])
header_xpath = ".//HEADER//P"
has_page1_element = has_elements(page1_ptags, header_xpath)
has_page1_header = has_elements(page1_ptags, header_xpath)
if not has_page1_element:
if not has_page1_header:
user_answer = None
self.evaluate_answer(scoring, user_answer, right_answer, points, method="equal")
continue
has_page2_rectangle = False
if "Rectangle" in (category or ""):
def has_elements(ptags, xpath):
for p in ptags:
element_list = p.xpath(xpath) if xpath else []
if element_list:
return True
return False
page2_ptags = pages.get('Page_2', [])
rectangle_xpath = ".//RECTANGLE"
has_page2_rectangle = has_elements(page2_ptags, rectangle_xpath)
if not has_page2_rectangle:
user_answer = None
self.evaluate_answer(scoring, user_answer, right_answer, points, method="equal")
continue
@@ -458,13 +453,48 @@ class XMLScorer:
self.evaluate_answer(scoring, user_answer, right_answer, points)
if scoring['points'] > 0:
break
# 글상자 정렬 [2-10] 문항
# 2페이지의 글상자의 ParaShape ID를 동적으로 찾아서 채점
elif "TextBoxAlign" in (category or ""):
if has_page2_rectangle:
# 2페이지 내에서만 검색
search_root = etree.Element("Page_2")
for p in page2_ptags:
search_root.append(p)
rectangle_parashape_id = search_root.xpath(".//RECTANGLE/ancestor::P[last()]/@ParaShape")
else:
# 전체 root에서 검색
rectangle_parashape_id = root.xpath(".//RECTANGLE/ancestor::P[last()]/@ParaShape")
# ParaShape ID가 있는 경우에만 xpath 치환 & 실행
if rectangle_parashape_id:
xpath = xpath.replace('{rectangle_parashape_id}', rectangle_parashape_id[0])
items = root.xpath(xpath)
else:
# RECTANGLE이 없으면 items는 빈 리스트
items = [None]
for item in items:
user_answer = item
self.evaluate_answer(scoring, user_answer, right_answer, points)
if scoring['points'] > 0:
break
# 정답이 하나인 경우
# elif (category or "") in ["OneAnswer", "ChartOneAnswer"]:
elif "OneAnswer" in (category or ""):
items = root.xpath(xpath) if xpath else []
items2 = root.xpath(xpath2) if xpath2 else []
elif "OneAnswer" in (category or ""):
if has_page2_rectangle:
search_root = etree.Element("Page_2")
for p in page2_ptags:
search_root.append(p)
items = search_root.xpath(xpath) if xpath else []
items2 = search_root.xpath(xpath2) if xpath2 else []
else:
items = root.xpath(xpath) if xpath else []
items2 = root.xpath(xpath2) if xpath2 else []
# 차트 XML에서 정답을 찾는 경우
# 차트 종류가
@@ -496,8 +526,15 @@ class XMLScorer:
break
# [2-6] 테두리 이중실선 1.00mm
elif (category or "") == "LineShape":
line_shapes = root.xpath(xpath) if xpath else []
elif "LineShape" in (category or ""):
if has_page2_rectangle:
search_root = etree.Element("Page_2")
for p in page2_ptags:
search_root.append(p)
line_shapes = search_root.xpath(xpath) if xpath else []
else:
line_shapes = root.xpath(xpath) if xpath else []
user_answer = {
'Style': None,
@@ -516,8 +553,16 @@ class XMLScorer:
break
# 사용자 입력값이 mm단위인 경우
elif (category or "") == "mmSize":
items = root.xpath(xpath)
# elif (category or "") == "mmSize":
elif "mmSize" in (category or ""):
if has_page2_rectangle:
search_root = etree.Element("Page_2")
for p in page2_ptags:
search_root.append(p)
items = search_root.xpath(xpath)
else:
items = root.xpath(xpath)
# 오차범위 설정
# 한글 프로그램 내부에서 드물게 0mm이지만 1pt로 저장되는 경우가 있음
#
@@ -567,10 +612,18 @@ class XMLScorer:
self.evaluate_answer(scoring, user_answer, right_answer, points)
# 채점기준표 파일에 작성된 rgb값을 그대로 읽어와 HML파일 요소의 int형 rgb값과 비교
elif (category or "") == "Color":
items = root.xpath(xpath) if xpath else []
items2 = root.xpath(xpath2) if xpath2 else []
elif "Color" in (category or ""):
if has_page2_rectangle:
search_root = etree.Element("Page_2")
for p in page2_ptags:
search_root.append(p)
items = search_root.xpath(xpath) if xpath else []
items2 = search_root.xpath(xpath2) if xpath2 else []
else:
items = root.xpath(xpath) if xpath else []
items2 = root.xpath(xpath2) if xpath2 else []
rgb_text = right_answer
# 정규식을 이용해 숫자만 리스트로 추출
@@ -605,8 +658,15 @@ class XMLScorer:
# 폰트명
elif "FontName" in (category or ""):
charshape_list = root.xpath(xpath)
if has_page2_rectangle:
search_root = etree.Element("Page_2")
for p in page2_ptags:
search_root.append(p)
charshape_list = search_root.xpath(xpath)
else:
charshape_list = root.xpath(xpath)
# 문자속성이 없는 경우
if not charshape_list:
user_answer = ""
@@ -978,7 +1038,7 @@ class XMLScorer:
onePersonResult['total_score'] = self.total_score
return onePersonResult
except ET.ParseError as e:
except etree.ParseError as e:
return {
'filename': os.path.basename(xml_file),
'error': f"XML 파싱 오류: {str(e)}",
@@ -986,7 +1046,7 @@ class XMLScorer:
}
def binary_to_chartxml(self, xml_path):
tree = ET.parse(xml_path)
tree = etree.parse(xml_path)
root = tree.getroot()
binary_data = root.xpath('//BINDATA[@Id=//BINITEM[@Format="OLE"]/@BinData]/text()')
@@ -1038,8 +1098,8 @@ class XMLScorer:
# 2. 공백제거, 특정 형식 제거
# 3. 리스트를 문자열로 변환
user_answer_root = ET.parse(user_answer_file).getroot()
correct_answer_root = ET.parse(correct_answer_file).getroot()
user_answer_root = etree.parse(user_answer_file).getroot()
correct_answer_root = etree.parse(correct_answer_file).getroot()
# xpath로 바이너리 부분추출
user_input_text = user_answer_root.xpath('//CHAR//text()[not(ancestor::HEADER) and not(ancestor::TABLE)]')
@@ -1054,7 +1114,7 @@ class XMLScorer:
# 차트 XML에서 차트제목 추출
if chart_xml is not None:
chart_xml_tree = ET.fromstring(chart_xml)
chart_xml_tree = etree.fromstring(chart_xml)
ns = {'c': 'http://schemas.openxmlformats.org/drawingml/2006/chart',
'a': 'http://schemas.openxmlformats.org/drawingml/2006/main'}
xpath_expr = '/c:chartSpace/c:chart/c:title/c:tx/c:rich/a:p/a:r/a:t'
@@ -1325,12 +1385,12 @@ def main():
exam_types = [
# 'A',
# 'B',
# 'C',
'D',
'C',
# 'D',
]
test_mode = False
# test_mode = True #/TEST 폴더 채점시
# test_mode = False
test_mode = True #/TEST 폴더 채점시
output_excel_paths = []
for exam_type in exam_types: