오탈자 관련 오류 수정

This commit is contained in:
2025-06-26 17:30:18 +09:00
parent 3ecd111ced
commit 6c18fba0cf
9 changed files with 64 additions and 32 deletions

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@@ -596,7 +596,7 @@
"value": true, "value": true,
"points": 3, "points": 3,
"category": "Boolean", "category": "Boolean",
"item": "문구 (…지역이 해당하는 티그리스강과…)/이 → 에 글자바꿈" "item": "문구 (…지역이 해당하는 티그리스강과…)/이 → 에 글자바꿈"
}, },
"32": { "32": {
"path": "boolean(//CHAR[contains(translate(text(), ' ', ''),'나는반드')])", "path": "boolean(//CHAR[contains(translate(text(), ' ', ''),'나는반드')])",

View File

@@ -1004,42 +1004,45 @@ class XMLScorer:
return xml_data return xml_data
def typo_check(self, correct_answer_file, user_answer_file, chart_xml): def typo_check(self, correct_answer_file, user_answer_file, chart_xml):
# 문자열 리스트를 필터링
def clean_text_list(text_list, ignore_words=None):
result = []
for text in text_list:
if ignore_words:
text = text.replace(ignore_words, '')
text = text.replace(' ', '') # 공백 제거
text = re.sub(r'\d+\.\s*|-', '', text) # 숫자. / - 제거
result.append(text)
return result
# 1. 텍스트 추출
# 2. 공백제거, 특정 형식 제거
# 3. 리스트를 문자열로 변환
user_answer_root = ET.parse(user_answer_file).getroot() user_answer_root = ET.parse(user_answer_file).getroot()
correct_answer_root = ET.parse(correct_answer_file).getroot() correct_answer_root = ET.parse(correct_answer_file).getroot()
# xpath로 바이너리 부분추출 # xpath로 바이너리 부분추출
user_input_text = user_answer_root.xpath('//CHAR//text()[not(ancestor::HEADER) and not(ancestor::TABLE)]') user_input_text = user_answer_root.xpath('//CHAR//text()[not(ancestor::HEADER) and not(ancestor::TABLE)]')
user_table_text = user_answer_root.xpath('//TABLE//CHAR//text()')
user_input_text += user_table_text
correct_input_text = correct_answer_root.xpath('//CHAR//text()[not(ancestor::HEADER) and not(ancestor::TABLE)]') correct_input_text = correct_answer_root.xpath('//CHAR//text()[not(ancestor::HEADER) and not(ancestor::TABLE)]')
correct_table_text = correct_answer_root.xpath('//TABLE//CHAR//text()')
correct_input_text += correct_table_text
# 차트 XML에서 제목 추출 # 테이블 구간 추출
user_table_text = user_answer_root.xpath('//TABLE//CHAR//text()')
correct_table_text = correct_answer_root.xpath('//TABLE//CHAR//text()')
# 차트 XML에서 차트제목 추출
if chart_xml is not None: if chart_xml is not None:
chart_xml_tree = ET.fromstring(chart_xml) chart_xml_tree = ET.fromstring(chart_xml)
# 차트 제목 추출 # 차트 제목 추출
user_chart_title = chart_xml_tree.xpath('/c:chartSpace/c:chart/c:title/c:tx/c:rich/a:p/a:r/a:t', namespaces={'c': 'http://schemas.openxmlformats.org/drawingml/2006/chart', 'a': 'http://schemas.openxmlformats.org/drawingml/2006/main'}) chart_title = chart_xml_tree.xpath('/c:chartSpace/c:chart/c:title/c:tx/c:rich/a:p/a:r/a:t', namespaces={'c': 'http://schemas.openxmlformats.org/drawingml/2006/chart', 'a': 'http://schemas.openxmlformats.org/drawingml/2006/main'})
# 차트 제목이 존재하는 경우 # 차트 제목이 존재하는 경우
if user_chart_title: if chart_title:
user_input_text.append(user_chart_title[0].text) user_chart_title = chart_title[0].text
# 차트 제목 정답 텍스트 추출
correct_chart_title = self.scoring_criteria["2"]["50"]["searchValue"] correct_chart_title = self.scoring_criteria["2"]["50"]["searchValue"]
correct_input_text.append(correct_chart_title)
# 각 요소에서 공백 제거
user_input_text = [text.replace(' ', '') for text in user_input_text]
correct_input_text = [text.replace(' ', '') for text in correct_input_text]
# 숫자와 특정 형식 제거 (예: 1., 2., 3., -)
user_input_text = [re.sub(r'\d+\.\s*|-', '', text) for text in user_input_text]
correct_input_text = [re.sub(r'\d+\.\s*|-', '', text) for text in correct_input_text]
try : try :
ignore_word = self.scoring_criteria["2"]["29"]["ignoreWord"] ignore_word = self.scoring_criteria["2"]["29"]["ignoreWord"]
# 특정 단어 제거 # 특정 단어 제거
@@ -1052,20 +1055,49 @@ class XMLScorer:
ignore_word = None ignore_word = None
# print(f"ignore_word: {ignore_word}") # print(f"ignore_word: {ignore_word}")
# 문자열 필터링
correct_input_text = clean_text_list(correct_input_text, ignore_word)
user_input_text = clean_text_list(user_input_text, ignore_word)
correct_table_text = clean_text_list(correct_table_text)
user_table_text = clean_text_list(user_table_text)
correct_chart_title = clean_text_list(correct_chart_title)
user_chart_title = clean_text_list(user_chart_title)
# 리스트를 하나의 문자열로 변경 # 리스트를 하나의 문자열로 변경
correct_input_text_str = ''.join(correct_input_text)
user_input_text_str = ''.join(user_input_text) user_input_text_str = ''.join(user_input_text)
currect_input_text_str = ''.join(correct_input_text)
correct_table_text_str = ''.join(correct_table_text)
user_table_text_str = ''.join(user_table_text)
correct_chart_title_str = ''.join(correct_chart_title)
user_chart_title_str = ''.join(user_chart_title)
print("user_input_text as string:") print("user_input_text as string:")
print(user_input_text_str) print(user_input_text_str)
print("\ncurrect_input_text_answer as string:") print("\n")
print(currect_input_text_str) print("correct_input_text_answer as string:")
print(correct_input_text_str)
# 문자열의 차이를 비교 # 문자열의 차이를 비교
diff = difflib.ndiff(currect_input_text_str, user_input_text_str) text_diff = difflib.ndiff(correct_input_text_str, user_input_text_str)
diff_list = list(diff) table_text_diff = difflib.ndiff(correct_table_text_str, user_table_text_str)
chart_title_diff = difflib.ndiff(correct_chart_title_str, user_chart_title_str)
# text_diff = difflib.ndiff(correct_input_text, user_input_text)
# table_text_diff = difflib.ndiff(correct_table_text, user_table_text)
# chart_title_diff = difflib.ndiff(correct_chart_title, user_chart_title)
# diff_list = list(diff)
text_list = list(text_diff)
table_list = list(table_text_diff)
chart_list = list(chart_title_diff)
diff_list = text_list + table_list + chart_list
# diff_list = text_list + table_list
# 차이점을 정리하여 result_diff에 저장 # 차이점을 정리하여 result_diff에 저장
result_diff = [] result_diff = []
@@ -1275,8 +1307,8 @@ def main():
# 'D', # 'D',
] ]
test_mode = False # test_mode = False
# test_mode = True #/TEST 폴더 채점시 test_mode = True #/TEST 폴더 채점시
output_excel_paths = [] output_excel_paths = []
for exam_type in exam_types: for exam_type in exam_types:

View File

@@ -1 +1 @@
[{"kind":2,"language":"xpath","value":"//a:t[text()='클라우드 보안투자']/ancestor::a:r//a:ea/@typeface"},{"kind":2,"language":"xpath","value":"boolean(//FONTFACE[@Lang='Hangul']/FONT[@Id=//CHARSHAPE/FONTID/@Hangul]/@Name='바탕' and //CHARSHAPE/@Height='1000' and //PARASHAPE/PARAMARGIN/@LineSpacing='160' and //PARASHAPE/@Align='Justify')"},{"kind":2,"language":"xpath","value":"//FONTFACE[@Lang='Hangul']/FONT[@Id=//CHARSHAPE/FONTID/@Hangul]/@Name='바탕'"},{"kind":2,"language":"xpath","value":"//FONTFACE[@Lang='Hangul']/FONT[@Id=//CHARSHAPE/FONTID/@Hangul]/@Name='바탕' and //CHARSHAPE/@Height='1000' and //PARASHAPE/PARAMARGIN/@LineSpacing='160' and //PARASHAPE/@Align='Justify')"},{"kind":2,"language":"xpath","value":"/HWPML/BODY/SECTION/P[19]"},{"kind":2,"language":"xpath","value":"//SECTION"},{"kind":2,"language":"xpath","value":"//P"},{"kind":2,"language":"xpath","value":"//P[.//FIELDBEGIN[@Type='Hyperlink'] and .//CHAR[contains(., 'http')]]"},{"kind":2,"language":"xpath","value":"//PICTURE[./IMAGE[@BinItem=//BINITEM[@Format='JPG']/@BinData]]/SHAPEOBJECT/POSITION[not(@TreatAsChar='true')]/@HorzOffset"},{"kind":2,"language":"xpath","value":"//CHARSHAPE[@Id=//TEXT[CHAR[text()='지']]/@CharShape]/@Height"},{"kind":2,"language":"xpath","value":"//TEXT[CHAR[text()='지']]/@CharShape"}] [{"kind":2,"language":"xpath","value":"//a:t[text()='클라우드 보안투자']/ancestor::a:r//a:ea/@typeface"},{"kind":2,"language":"xpath","value":"boolean(//FONTFACE[@Lang='Hangul']/FONT[@Id=//CHARSHAPE/FONTID/@Hangul]/@Name='바탕' and //CHARSHAPE/@Height='1000' and //PARASHAPE/PARAMARGIN/@LineSpacing='160' and //PARASHAPE/@Align='Justify')"},{"kind":2,"language":"xpath","value":"//FONTFACE[@Lang='Hangul']/FONT[@Id=//CHARSHAPE/FONTID/@Hangul]/@Name='바탕'"},{"kind":2,"language":"xpath","value":"//FONTFACE[@Lang='Hangul']/FONT[@Id=//CHARSHAPE/FONTID/@Hangul]/@Name='바탕' and //CHARSHAPE/@Height='1000' and //PARASHAPE/PARAMARGIN/@LineSpacing='160' and //PARASHAPE/@Align='Justify')"},{"kind":2,"language":"xpath","value":"/HWPML/BODY/SECTION/P[19]"},{"kind":2,"language":"xpath","value":"//SECTION"},{"kind":2,"language":"xpath","value":"//P"},{"kind":2,"language":"xpath","value":"//P[.//FIELDBEGIN[@Type='Hyperlink'] and .//CHAR[contains(., 'http')]]"},{"kind":2,"language":"xpath","value":"//PICTURE[./IMAGE[@BinItem=//BINITEM[@Format='JPG']/@BinData]]/SHAPEOBJECT/POSITION[not(@TreatAsChar='true')]/@HorzOffset"},{"kind":2,"language":"xpath","value":"//CHARSHAPE[@Id=//TEXT[CHAR[text()='지']]/@CharShape]/@Height"},{"kind":2,"language":"xpath","value":"//TABLE//CHAR//text()"}]