diff --git a/250624_DIW_2506_3A_채점결과00.xlsx b/250624_DIW_2506_3A_채점결과00.xlsx index 22d1d8d..d0c15a5 100644 Binary files a/250624_DIW_2506_3A_채점결과00.xlsx and b/250624_DIW_2506_3A_채점결과00.xlsx differ diff --git a/250625_DIW_2506_3A_TEST.xlsx b/250625_DIW_2506_3A_TEST.xlsx new file mode 100644 index 0000000..24298ce Binary files /dev/null and b/250625_DIW_2506_3A_TEST.xlsx differ diff --git a/250626_DIW_2506_3A_TEST.xlsx b/250626_DIW_2506_3A_TEST.xlsx new file mode 100644 index 0000000..06ad17a Binary files /dev/null and b/250626_DIW_2506_3A_TEST.xlsx differ diff --git a/250626_DIW_2506_3A_TEST_000.xlsx b/250626_DIW_2506_3A_TEST_000.xlsx new file mode 100644 index 0000000..e578611 Binary files /dev/null and b/250626_DIW_2506_3A_TEST_000.xlsx differ diff --git a/DIW_2505C.json b/DIW_2505C.json index 634fe12..970cd01 100644 --- a/DIW_2505C.json +++ b/DIW_2505C.json @@ -596,7 +596,7 @@ "value": true, "points": 3, "category": "Boolean", - "item": "문구 (…지역이 해당하는 티그리스강과…)/이 → 에 글자바꿈" + "item": "문구 (…지역이 해당하는 티그리스강과…)/이 → 에 글자바꿈" }, "32": { "path": "boolean(//CHAR[contains(translate(text(), ' ', ''),'나는반드')])", diff --git a/diwScoring2.py b/diwScoring2.py index 1fd8f21..cf58c98 100644 --- a/diwScoring2.py +++ b/diwScoring2.py @@ -1004,42 +1004,45 @@ class XMLScorer: return xml_data def typo_check(self, correct_answer_file, user_answer_file, chart_xml): + + # 문자열 리스트를 필터링 + def clean_text_list(text_list, ignore_words=None): + result = [] + for text in text_list: + if ignore_words: + text = text.replace(ignore_words, '') + text = text.replace(' ', '') # 공백 제거 + text = re.sub(r'\d+\.\s*|-', '', text) # 숫자. / - 제거 + result.append(text) + return result + + # 1. 텍스트 추출 + # 2. 공백제거, 특정 형식 제거 + # 3. 리스트를 문자열로 변환 + user_answer_root = ET.parse(user_answer_file).getroot() correct_answer_root = ET.parse(correct_answer_file).getroot() # xpath로 바이너리 부분추출 user_input_text = user_answer_root.xpath('//CHAR//text()[not(ancestor::HEADER) and not(ancestor::TABLE)]') - user_table_text = user_answer_root.xpath('//TABLE//CHAR//text()') - user_input_text += user_table_text - correct_input_text = correct_answer_root.xpath('//CHAR//text()[not(ancestor::HEADER) and not(ancestor::TABLE)]') - correct_table_text = correct_answer_root.xpath('//TABLE//CHAR//text()') - correct_input_text += correct_table_text - # 차트 XML에서 제목 추출 + # 테이블 구간 추출 + user_table_text = user_answer_root.xpath('//TABLE//CHAR//text()') + correct_table_text = correct_answer_root.xpath('//TABLE//CHAR//text()') + + # 차트 XML에서 차트제목 추출 if chart_xml is not None: chart_xml_tree = ET.fromstring(chart_xml) # 차트 제목 추출 - user_chart_title = chart_xml_tree.xpath('/c:chartSpace/c:chart/c:title/c:tx/c:rich/a:p/a:r/a:t', namespaces={'c': 'http://schemas.openxmlformats.org/drawingml/2006/chart', 'a': 'http://schemas.openxmlformats.org/drawingml/2006/main'}) + chart_title = chart_xml_tree.xpath('/c:chartSpace/c:chart/c:title/c:tx/c:rich/a:p/a:r/a:t', namespaces={'c': 'http://schemas.openxmlformats.org/drawingml/2006/chart', 'a': 'http://schemas.openxmlformats.org/drawingml/2006/main'}) # 차트 제목이 존재하는 경우 - if user_chart_title: - user_input_text.append(user_chart_title[0].text) - - # 차트 제목 정답 텍스트 추출 + if chart_title: + user_chart_title = chart_title[0].text correct_chart_title = self.scoring_criteria["2"]["50"]["searchValue"] - correct_input_text.append(correct_chart_title) - # 각 요소에서 공백 제거 - user_input_text = [text.replace(' ', '') for text in user_input_text] - correct_input_text = [text.replace(' ', '') for text in correct_input_text] - - - # 숫자와 특정 형식 제거 (예: 1., 2., 3., -) - user_input_text = [re.sub(r'\d+\.\s*|-', '', text) for text in user_input_text] - correct_input_text = [re.sub(r'\d+\.\s*|-', '', text) for text in correct_input_text] - try : ignore_word = self.scoring_criteria["2"]["29"]["ignoreWord"] # 특정 단어 제거 @@ -1052,20 +1055,49 @@ class XMLScorer: ignore_word = None # print(f"ignore_word: {ignore_word}") - + + # 문자열 필터링 + correct_input_text = clean_text_list(correct_input_text, ignore_word) + user_input_text = clean_text_list(user_input_text, ignore_word) + + correct_table_text = clean_text_list(correct_table_text) + user_table_text = clean_text_list(user_table_text) + + correct_chart_title = clean_text_list(correct_chart_title) + user_chart_title = clean_text_list(user_chart_title) + # 리스트를 하나의 문자열로 변경 + correct_input_text_str = ''.join(correct_input_text) user_input_text_str = ''.join(user_input_text) - currect_input_text_str = ''.join(correct_input_text) + correct_table_text_str = ''.join(correct_table_text) + user_table_text_str = ''.join(user_table_text) + + correct_chart_title_str = ''.join(correct_chart_title) + user_chart_title_str = ''.join(user_chart_title) + print("user_input_text as string:") print(user_input_text_str) - print("\ncurrect_input_text_answer as string:") - print(currect_input_text_str) + print("\n") + print("correct_input_text_answer as string:") + print(correct_input_text_str) # 문자열의 차이를 비교 - diff = difflib.ndiff(currect_input_text_str, user_input_text_str) - diff_list = list(diff) - + text_diff = difflib.ndiff(correct_input_text_str, user_input_text_str) + table_text_diff = difflib.ndiff(correct_table_text_str, user_table_text_str) + chart_title_diff = difflib.ndiff(correct_chart_title_str, user_chart_title_str) + + # text_diff = difflib.ndiff(correct_input_text, user_input_text) + # table_text_diff = difflib.ndiff(correct_table_text, user_table_text) + # chart_title_diff = difflib.ndiff(correct_chart_title, user_chart_title) + # diff_list = list(diff) + text_list = list(text_diff) + table_list = list(table_text_diff) + chart_list = list(chart_title_diff) + + diff_list = text_list + table_list + chart_list + # diff_list = text_list + table_list + # 차이점을 정리하여 result_diff에 저장 result_diff = [] @@ -1275,8 +1307,8 @@ def main(): # 'D', ] - test_mode = False - # test_mode = True #/TEST 폴더 채점시 + # test_mode = False + test_mode = True #/TEST 폴더 채점시 output_excel_paths = [] for exam_type in exam_types: diff --git a/zzz.xbook b/zzz.xbook index 50a44cc..c134c20 100644 --- a/zzz.xbook +++ b/zzz.xbook @@ -1 +1 @@ -[{"kind":2,"language":"xpath","value":"//a:t[text()='클라우드 보안투자']/ancestor::a:r//a:ea/@typeface"},{"kind":2,"language":"xpath","value":"boolean(//FONTFACE[@Lang='Hangul']/FONT[@Id=//CHARSHAPE/FONTID/@Hangul]/@Name='바탕' and //CHARSHAPE/@Height='1000' and //PARASHAPE/PARAMARGIN/@LineSpacing='160' and //PARASHAPE/@Align='Justify')"},{"kind":2,"language":"xpath","value":"//FONTFACE[@Lang='Hangul']/FONT[@Id=//CHARSHAPE/FONTID/@Hangul]/@Name='바탕'"},{"kind":2,"language":"xpath","value":"//FONTFACE[@Lang='Hangul']/FONT[@Id=//CHARSHAPE/FONTID/@Hangul]/@Name='바탕' and //CHARSHAPE/@Height='1000' and //PARASHAPE/PARAMARGIN/@LineSpacing='160' and //PARASHAPE/@Align='Justify')"},{"kind":2,"language":"xpath","value":"/HWPML/BODY/SECTION/P[19]"},{"kind":2,"language":"xpath","value":"//SECTION"},{"kind":2,"language":"xpath","value":"//P"},{"kind":2,"language":"xpath","value":"//P[.//FIELDBEGIN[@Type='Hyperlink'] and .//CHAR[contains(., 'http')]]"},{"kind":2,"language":"xpath","value":"//PICTURE[./IMAGE[@BinItem=//BINITEM[@Format='JPG']/@BinData]]/SHAPEOBJECT/POSITION[not(@TreatAsChar='true')]/@HorzOffset"},{"kind":2,"language":"xpath","value":"//CHARSHAPE[@Id=//TEXT[CHAR[text()='지']]/@CharShape]/@Height"},{"kind":2,"language":"xpath","value":"//TEXT[CHAR[text()='지']]/@CharShape"}] \ No newline at end of file +[{"kind":2,"language":"xpath","value":"//a:t[text()='클라우드 보안투자']/ancestor::a:r//a:ea/@typeface"},{"kind":2,"language":"xpath","value":"boolean(//FONTFACE[@Lang='Hangul']/FONT[@Id=//CHARSHAPE/FONTID/@Hangul]/@Name='바탕' and //CHARSHAPE/@Height='1000' and //PARASHAPE/PARAMARGIN/@LineSpacing='160' and //PARASHAPE/@Align='Justify')"},{"kind":2,"language":"xpath","value":"//FONTFACE[@Lang='Hangul']/FONT[@Id=//CHARSHAPE/FONTID/@Hangul]/@Name='바탕'"},{"kind":2,"language":"xpath","value":"//FONTFACE[@Lang='Hangul']/FONT[@Id=//CHARSHAPE/FONTID/@Hangul]/@Name='바탕' and //CHARSHAPE/@Height='1000' and //PARASHAPE/PARAMARGIN/@LineSpacing='160' and //PARASHAPE/@Align='Justify')"},{"kind":2,"language":"xpath","value":"/HWPML/BODY/SECTION/P[19]"},{"kind":2,"language":"xpath","value":"//SECTION"},{"kind":2,"language":"xpath","value":"//P"},{"kind":2,"language":"xpath","value":"//P[.//FIELDBEGIN[@Type='Hyperlink'] and .//CHAR[contains(., 'http')]]"},{"kind":2,"language":"xpath","value":"//PICTURE[./IMAGE[@BinItem=//BINITEM[@Format='JPG']/@BinData]]/SHAPEOBJECT/POSITION[not(@TreatAsChar='true')]/@HorzOffset"},{"kind":2,"language":"xpath","value":"//CHARSHAPE[@Id=//TEXT[CHAR[text()='지']]/@CharShape]/@Height"},{"kind":2,"language":"xpath","value":"//TABLE//CHAR//text()"}] \ No newline at end of file diff --git a/회차별채점자료/2505/2505확인필요.xlsx b/회차별채점자료/2505/2505확인필요.xlsx index f786852..f6627d4 100644 Binary files a/회차별채점자료/2505/2505확인필요.xlsx and b/회차별채점자료/2505/2505확인필요.xlsx differ diff --git a/회차별채점자료/2506_3/excel_채점기준표/DIW2506_3.xlsx b/회차별채점자료/2506_3/excel_채점기준표/DIW2506_3.xlsx index 2a3fe5b..189062a 100644 Binary files a/회차별채점자료/2506_3/excel_채점기준표/DIW2506_3.xlsx and b/회차별채점자료/2506_3/excel_채점기준표/DIW2506_3.xlsx differ