오탈자 관련 오류 수정
This commit is contained in:
Binary file not shown.
BIN
250625_DIW_2506_3A_TEST.xlsx
Normal file
BIN
250625_DIW_2506_3A_TEST.xlsx
Normal file
Binary file not shown.
BIN
250626_DIW_2506_3A_TEST.xlsx
Normal file
BIN
250626_DIW_2506_3A_TEST.xlsx
Normal file
Binary file not shown.
BIN
250626_DIW_2506_3A_TEST_000.xlsx
Normal file
BIN
250626_DIW_2506_3A_TEST_000.xlsx
Normal file
Binary file not shown.
@@ -596,7 +596,7 @@
|
|||||||
"value": true,
|
"value": true,
|
||||||
"points": 3,
|
"points": 3,
|
||||||
"category": "Boolean",
|
"category": "Boolean",
|
||||||
"item": "문구 (…지역이 해당하는 티그리스강과…)/이 → 에 글자바꿈"
|
"item": "문구 (…지역이 해당하는 티그리스강과…)/이 → 에 글자바꿈"
|
||||||
},
|
},
|
||||||
"32": {
|
"32": {
|
||||||
"path": "boolean(//CHAR[contains(translate(text(), ' ', ''),'나는반드')])",
|
"path": "boolean(//CHAR[contains(translate(text(), ' ', ''),'나는반드')])",
|
||||||
|
|||||||
@@ -1004,42 +1004,45 @@ class XMLScorer:
|
|||||||
return xml_data
|
return xml_data
|
||||||
|
|
||||||
def typo_check(self, correct_answer_file, user_answer_file, chart_xml):
|
def typo_check(self, correct_answer_file, user_answer_file, chart_xml):
|
||||||
|
|
||||||
|
# 문자열 리스트를 필터링
|
||||||
|
def clean_text_list(text_list, ignore_words=None):
|
||||||
|
result = []
|
||||||
|
for text in text_list:
|
||||||
|
if ignore_words:
|
||||||
|
text = text.replace(ignore_words, '')
|
||||||
|
text = text.replace(' ', '') # 공백 제거
|
||||||
|
text = re.sub(r'\d+\.\s*|-', '', text) # 숫자. / - 제거
|
||||||
|
result.append(text)
|
||||||
|
return result
|
||||||
|
|
||||||
|
# 1. 텍스트 추출
|
||||||
|
# 2. 공백제거, 특정 형식 제거
|
||||||
|
# 3. 리스트를 문자열로 변환
|
||||||
|
|
||||||
user_answer_root = ET.parse(user_answer_file).getroot()
|
user_answer_root = ET.parse(user_answer_file).getroot()
|
||||||
correct_answer_root = ET.parse(correct_answer_file).getroot()
|
correct_answer_root = ET.parse(correct_answer_file).getroot()
|
||||||
|
|
||||||
# xpath로 바이너리 부분추출
|
# xpath로 바이너리 부분추출
|
||||||
user_input_text = user_answer_root.xpath('//CHAR//text()[not(ancestor::HEADER) and not(ancestor::TABLE)]')
|
user_input_text = user_answer_root.xpath('//CHAR//text()[not(ancestor::HEADER) and not(ancestor::TABLE)]')
|
||||||
user_table_text = user_answer_root.xpath('//TABLE//CHAR//text()')
|
|
||||||
user_input_text += user_table_text
|
|
||||||
|
|
||||||
correct_input_text = correct_answer_root.xpath('//CHAR//text()[not(ancestor::HEADER) and not(ancestor::TABLE)]')
|
correct_input_text = correct_answer_root.xpath('//CHAR//text()[not(ancestor::HEADER) and not(ancestor::TABLE)]')
|
||||||
correct_table_text = correct_answer_root.xpath('//TABLE//CHAR//text()')
|
|
||||||
correct_input_text += correct_table_text
|
|
||||||
|
|
||||||
# 차트 XML에서 제목 추출
|
# 테이블 구간 추출
|
||||||
|
user_table_text = user_answer_root.xpath('//TABLE//CHAR//text()')
|
||||||
|
correct_table_text = correct_answer_root.xpath('//TABLE//CHAR//text()')
|
||||||
|
|
||||||
|
# 차트 XML에서 차트제목 추출
|
||||||
if chart_xml is not None:
|
if chart_xml is not None:
|
||||||
chart_xml_tree = ET.fromstring(chart_xml)
|
chart_xml_tree = ET.fromstring(chart_xml)
|
||||||
|
|
||||||
# 차트 제목 추출
|
# 차트 제목 추출
|
||||||
user_chart_title = chart_xml_tree.xpath('/c:chartSpace/c:chart/c:title/c:tx/c:rich/a:p/a:r/a:t', namespaces={'c': 'http://schemas.openxmlformats.org/drawingml/2006/chart', 'a': 'http://schemas.openxmlformats.org/drawingml/2006/main'})
|
chart_title = chart_xml_tree.xpath('/c:chartSpace/c:chart/c:title/c:tx/c:rich/a:p/a:r/a:t', namespaces={'c': 'http://schemas.openxmlformats.org/drawingml/2006/chart', 'a': 'http://schemas.openxmlformats.org/drawingml/2006/main'})
|
||||||
|
|
||||||
# 차트 제목이 존재하는 경우
|
# 차트 제목이 존재하는 경우
|
||||||
if user_chart_title:
|
if chart_title:
|
||||||
user_input_text.append(user_chart_title[0].text)
|
user_chart_title = chart_title[0].text
|
||||||
|
|
||||||
# 차트 제목 정답 텍스트 추출
|
|
||||||
correct_chart_title = self.scoring_criteria["2"]["50"]["searchValue"]
|
correct_chart_title = self.scoring_criteria["2"]["50"]["searchValue"]
|
||||||
correct_input_text.append(correct_chart_title)
|
|
||||||
|
|
||||||
# 각 요소에서 공백 제거
|
|
||||||
user_input_text = [text.replace(' ', '') for text in user_input_text]
|
|
||||||
correct_input_text = [text.replace(' ', '') for text in correct_input_text]
|
|
||||||
|
|
||||||
|
|
||||||
# 숫자와 특정 형식 제거 (예: 1., 2., 3., -)
|
|
||||||
user_input_text = [re.sub(r'\d+\.\s*|-', '', text) for text in user_input_text]
|
|
||||||
correct_input_text = [re.sub(r'\d+\.\s*|-', '', text) for text in correct_input_text]
|
|
||||||
|
|
||||||
try :
|
try :
|
||||||
ignore_word = self.scoring_criteria["2"]["29"]["ignoreWord"]
|
ignore_word = self.scoring_criteria["2"]["29"]["ignoreWord"]
|
||||||
# 특정 단어 제거
|
# 특정 단어 제거
|
||||||
@@ -1052,20 +1055,49 @@ class XMLScorer:
|
|||||||
ignore_word = None
|
ignore_word = None
|
||||||
|
|
||||||
# print(f"ignore_word: {ignore_word}")
|
# print(f"ignore_word: {ignore_word}")
|
||||||
|
|
||||||
|
# 문자열 필터링
|
||||||
|
correct_input_text = clean_text_list(correct_input_text, ignore_word)
|
||||||
|
user_input_text = clean_text_list(user_input_text, ignore_word)
|
||||||
|
|
||||||
|
correct_table_text = clean_text_list(correct_table_text)
|
||||||
|
user_table_text = clean_text_list(user_table_text)
|
||||||
|
|
||||||
|
correct_chart_title = clean_text_list(correct_chart_title)
|
||||||
|
user_chart_title = clean_text_list(user_chart_title)
|
||||||
|
|
||||||
# 리스트를 하나의 문자열로 변경
|
# 리스트를 하나의 문자열로 변경
|
||||||
|
correct_input_text_str = ''.join(correct_input_text)
|
||||||
user_input_text_str = ''.join(user_input_text)
|
user_input_text_str = ''.join(user_input_text)
|
||||||
currect_input_text_str = ''.join(correct_input_text)
|
|
||||||
|
|
||||||
|
correct_table_text_str = ''.join(correct_table_text)
|
||||||
|
user_table_text_str = ''.join(user_table_text)
|
||||||
|
|
||||||
|
correct_chart_title_str = ''.join(correct_chart_title)
|
||||||
|
user_chart_title_str = ''.join(user_chart_title)
|
||||||
|
|
||||||
print("user_input_text as string:")
|
print("user_input_text as string:")
|
||||||
print(user_input_text_str)
|
print(user_input_text_str)
|
||||||
print("\ncurrect_input_text_answer as string:")
|
print("\n")
|
||||||
print(currect_input_text_str)
|
print("correct_input_text_answer as string:")
|
||||||
|
print(correct_input_text_str)
|
||||||
|
|
||||||
# 문자열의 차이를 비교
|
# 문자열의 차이를 비교
|
||||||
diff = difflib.ndiff(currect_input_text_str, user_input_text_str)
|
text_diff = difflib.ndiff(correct_input_text_str, user_input_text_str)
|
||||||
diff_list = list(diff)
|
table_text_diff = difflib.ndiff(correct_table_text_str, user_table_text_str)
|
||||||
|
chart_title_diff = difflib.ndiff(correct_chart_title_str, user_chart_title_str)
|
||||||
|
|
||||||
|
# text_diff = difflib.ndiff(correct_input_text, user_input_text)
|
||||||
|
# table_text_diff = difflib.ndiff(correct_table_text, user_table_text)
|
||||||
|
# chart_title_diff = difflib.ndiff(correct_chart_title, user_chart_title)
|
||||||
|
# diff_list = list(diff)
|
||||||
|
text_list = list(text_diff)
|
||||||
|
table_list = list(table_text_diff)
|
||||||
|
chart_list = list(chart_title_diff)
|
||||||
|
|
||||||
|
diff_list = text_list + table_list + chart_list
|
||||||
|
# diff_list = text_list + table_list
|
||||||
|
|
||||||
# 차이점을 정리하여 result_diff에 저장
|
# 차이점을 정리하여 result_diff에 저장
|
||||||
result_diff = []
|
result_diff = []
|
||||||
|
|
||||||
@@ -1275,8 +1307,8 @@ def main():
|
|||||||
# 'D',
|
# 'D',
|
||||||
]
|
]
|
||||||
|
|
||||||
test_mode = False
|
# test_mode = False
|
||||||
# test_mode = True #/TEST 폴더 채점시
|
test_mode = True #/TEST 폴더 채점시
|
||||||
|
|
||||||
output_excel_paths = []
|
output_excel_paths = []
|
||||||
for exam_type in exam_types:
|
for exam_type in exam_types:
|
||||||
|
|||||||
@@ -1 +1 @@
|
|||||||
[{"kind":2,"language":"xpath","value":"//a:t[text()='클라우드 보안투자']/ancestor::a:r//a:ea/@typeface"},{"kind":2,"language":"xpath","value":"boolean(//FONTFACE[@Lang='Hangul']/FONT[@Id=//CHARSHAPE/FONTID/@Hangul]/@Name='바탕' and //CHARSHAPE/@Height='1000' and //PARASHAPE/PARAMARGIN/@LineSpacing='160' and //PARASHAPE/@Align='Justify')"},{"kind":2,"language":"xpath","value":"//FONTFACE[@Lang='Hangul']/FONT[@Id=//CHARSHAPE/FONTID/@Hangul]/@Name='바탕'"},{"kind":2,"language":"xpath","value":"//FONTFACE[@Lang='Hangul']/FONT[@Id=//CHARSHAPE/FONTID/@Hangul]/@Name='바탕' and //CHARSHAPE/@Height='1000' and //PARASHAPE/PARAMARGIN/@LineSpacing='160' and //PARASHAPE/@Align='Justify')"},{"kind":2,"language":"xpath","value":"/HWPML/BODY/SECTION/P[19]"},{"kind":2,"language":"xpath","value":"//SECTION"},{"kind":2,"language":"xpath","value":"//P"},{"kind":2,"language":"xpath","value":"//P[.//FIELDBEGIN[@Type='Hyperlink'] and .//CHAR[contains(., 'http')]]"},{"kind":2,"language":"xpath","value":"//PICTURE[./IMAGE[@BinItem=//BINITEM[@Format='JPG']/@BinData]]/SHAPEOBJECT/POSITION[not(@TreatAsChar='true')]/@HorzOffset"},{"kind":2,"language":"xpath","value":"//CHARSHAPE[@Id=//TEXT[CHAR[text()='지']]/@CharShape]/@Height"},{"kind":2,"language":"xpath","value":"//TEXT[CHAR[text()='지']]/@CharShape"}]
|
[{"kind":2,"language":"xpath","value":"//a:t[text()='클라우드 보안투자']/ancestor::a:r//a:ea/@typeface"},{"kind":2,"language":"xpath","value":"boolean(//FONTFACE[@Lang='Hangul']/FONT[@Id=//CHARSHAPE/FONTID/@Hangul]/@Name='바탕' and //CHARSHAPE/@Height='1000' and //PARASHAPE/PARAMARGIN/@LineSpacing='160' and //PARASHAPE/@Align='Justify')"},{"kind":2,"language":"xpath","value":"//FONTFACE[@Lang='Hangul']/FONT[@Id=//CHARSHAPE/FONTID/@Hangul]/@Name='바탕'"},{"kind":2,"language":"xpath","value":"//FONTFACE[@Lang='Hangul']/FONT[@Id=//CHARSHAPE/FONTID/@Hangul]/@Name='바탕' and //CHARSHAPE/@Height='1000' and //PARASHAPE/PARAMARGIN/@LineSpacing='160' and //PARASHAPE/@Align='Justify')"},{"kind":2,"language":"xpath","value":"/HWPML/BODY/SECTION/P[19]"},{"kind":2,"language":"xpath","value":"//SECTION"},{"kind":2,"language":"xpath","value":"//P"},{"kind":2,"language":"xpath","value":"//P[.//FIELDBEGIN[@Type='Hyperlink'] and .//CHAR[contains(., 'http')]]"},{"kind":2,"language":"xpath","value":"//PICTURE[./IMAGE[@BinItem=//BINITEM[@Format='JPG']/@BinData]]/SHAPEOBJECT/POSITION[not(@TreatAsChar='true')]/@HorzOffset"},{"kind":2,"language":"xpath","value":"//CHARSHAPE[@Id=//TEXT[CHAR[text()='지']]/@CharShape]/@Height"},{"kind":2,"language":"xpath","value":"//TABLE//CHAR//text()"}]
|
||||||
Binary file not shown.
Binary file not shown.
Reference in New Issue
Block a user