1. 2504회 정시 채점자료 업데이트

2. "■ 행사안내 ■" 특수문자 제외하고 검색 적용 3. 문서 내 하이퍼링크 텍스트 존재 할 경우 해당 문자열 처리 방법 추가
2025-04-30 17:54:55 +09:00
parent 0cb1cc0b08
commit c7357d088b
33 changed files with 26667 additions and 63 deletions
--- a/diwScoring.py
+++ b/diwScoring.py
@@ -35,7 +35,59 @@ class XMLScorer:
        second_xpath = args[1]
        points = args[2]
        category = args[3]
+        right_answer = args[4]
        
+        if "Hyperlink" in category:
+            is_hyperlink = self.scoring_criteria["1"]["17"]["hyperlink"]
+            hyperlink_xpath = self.scoring_criteria["1"]["17"]["hyperlink_xpath"]
+            right_text = self.scoring_criteria["1"]["17"]["searchValue"].replace(" ","")
+            try:
+                p_elements = root.xpath(is_hyperlink)
+
+                for p in p_elements:
+                    text_list = p.xpath(".//CHAR/text()")
+                    full_text = ''.join(text_list).replace(" ", "")
+                    # right_text의 첫 문자
+                    first_char = right_text[0]
+                    # full_text에서 첫 문자 위치 찾기
+                    index = full_text.find(first_char)
+
+                    if index != -1:
+                        trimmed_full_text = full_text[index:]
+                    else:
+                        trimmed_full_text = full_text  # 일치 문자 없으면 원본 그대로
+                        
+                    similarity = difflib.SequenceMatcher(None, trimmed_full_text, right_text).ratio()
+                    # 두 문자열이 같을 경우만 하이퍼링크 확인
+                    if similarity >= 0.7:
+                        inside_field = False
+                        charshape_values = []
+
+                        for elem in p.iter():
+                            # 시작 지점 확인
+                            if elem.tag == "FIELDBEGIN":
+                                inside_field = True
+                            elif elem.tag == "FIELDEND":
+                                inside_field = False
+                            elif inside_field and elem.tag == "TEXT":
+                                charshape = elem.get("CharShape")
+                                if charshape:
+                                    charshape_values.append(charshape)
+                    
+                        # 하이퍼링크에 해당하는 P태그 내 존재하는 charshape ID값 모두를 비교해 해당 속성(ITALIC, BOLD, UNDERLINE) 확인
+                        if charshape_values:    
+                            for charshape in charshape_values:
+                                result = root.xpath(hyperlink_xpath.replace('{charshape_id}', charshape))
+                                # 해당 속성이 하나라도 적용되어있지 않으면 False 반환
+                                if not result:
+                                    return result
+                            return True
+                    else:
+                        return False
+                    
+            except ET.XPathEvalError as e:
+                return None
+            
        if ("특수문자" in category) and (second_xpath is not None):
            try:
                result = root.xpath(first_xpath)
@@ -51,13 +103,23 @@ class XMLScorer:

            except ET.XPathEvalError as e:
                return None
-            
+        
+        # xpath2가 있는 경우
        elif second_xpath is not None:
            try:
                result1 = root.xpath(first_xpath)
                result2 = root.xpath(second_xpath)
                if (type(result1) is list and len(result1) == 0) and (type(result2) is list and len(result2) == 0):
                    return None
+                
+                # xpath1과 xpath2의 결과값이 모두 리스트인 경우
+                # 두 결과값 중 정답이 포함된 리스트를 반환
+                if type(result1) is list and type(result2) is list:
+                    if right_answer in result1:
+                        return result1
+                    elif right_answer in result2:
+                        return result2    
+                
                return result1 if result1 else result2
            
            except ET.XPathEvalError as e:
@@ -99,11 +161,15 @@ class XMLScorer:
        # all_text.append(root.xpath(f"//TEXTART/@text"))

        namespaces = {
+
        'a': 'http://schemas.openxmlformats.org/drawingml/2006/main',
        'c': 'http://schemas.openxmlformats.org/drawingml/2006/chart'
        }
        
-        all_text = root.xpath(f"//BODY//text() | //TEXTART/@Text | //c:chart//text()", namespaces=namespaces)
+        if type(root) is str:
+            all_text = root
+        else:
+            all_text = root.xpath(f"//BODY//text() | //TEXTART/@Text | //c:chart//text()", namespaces=namespaces)
        
        # 유사도 비교
        max_score = 0
@@ -163,39 +229,7 @@ class XMLScorer:
                    category = criterion['category'] if 'category' in criterion else None
                    item = criterion['item']
                    similar_text = None
-    
-                    # if "pageSetting" in (format or ""):
-                    #     results = root.xpath("//PAGEMARGIN")
-                    #     matched = False
-                        
-                    #     for i, user_answer in enumerate(results):
-                    #         bottom = int(user_answer.get("Bottom"), 0)
-                    #         footer = int(user_answer.get("Footer"), 0)
-                    #         gutter = int(user_answer.get("Gutter"), 0)
-                    #         header = int(user_answer.get("Header"), 0)
-                    #         left = int(user_answer.get("Left"), 0)
-                    #         right = int(user_answer.get("Right"), 0)
-                    #         top = int(user_answer.get("Top"), 0)
-                    #         error_range = 1
                            
-                    #         if abs(right_answer['Bottom'] - bottom) <= error_range and \
-                    #             abs(right_answer['Footer'] - footer) <= error_range and \
-                    #             abs(right_answer['Gutter'] - gutter) <= error_range and \
-                    #             abs(right_answer['Header'] - header) <= error_range and \
-                    #             abs(right_answer['Left'] - left) <= error_range and \
-                    #             abs(right_answer['Right'] - right) <= error_range and \
-                    #             abs(right_answer['Top'] - top) <= error_range:
-                    #             matched = True
-                                
-                    #             total_score += points
-                    #             partial_score += points
-                    #             scoring['points'] = points
-                    #             break
-                            
-                    #     if not matched:
-                    #         scoring['points'] = 0
-                            
-                        
                    # chart xml 파일에서 채점하는 경우
                    if "chart_xml" in category:
                        if search_value is not None:
@@ -216,7 +250,7 @@ class XMLScorer:
                            else: 
                                xpath = xpath.replace('{searchValue}', similar_text)
                                
-                        result = self.query_xml(root, xpath, xpath2, points, category)
+                        result = self.query_xml(root, xpath, xpath2, points, category, right_answer)

                    # [ boolean 타입 ]
                    # 1. 이텔릭체, 굵게, 밑줄 등 효과가 적용 여부에 따라 
@@ -349,8 +383,9 @@ class XMLScorer:
        correct_input_text = [re.sub(r'\d+\.\s*|-', '', text) for text in correct_input_text]
    
        try :
-            xpath = self.scoring_criteria["2-29"]['path'].split("'")[1]
-            ignore_word = xpath.split("'")[1]
+            # xpath = self.scoring_criteria["2"]["29"]['path'].split("'")[1]
+            # ignore_word = xpath.split("'")[1]
+            ignore_word = self.scoring_criteria["2"]["29"]["ignoreWord"]
            # 특정 단어 제거
            # 오타와 누락의 경우만 판단하면 정상작동하지만
            # 추가 된 단어의 경우를 채점기준에 추가하면 정확하게 채점 되지 않을 수 있음
@@ -561,20 +596,22 @@ def main():

    # 시험회차 및 유형
    exam_round = '2504_3'
+    
+    # 250429기준 없는 시험 형식(A,B,C..)은 주석처리 하지 않으면 오류 발생
    exam_types = [
        # 'A',
-        'B',
+        # 'B',
        'C',
    ]
-    test_mode = False
-    # test_mode = True
+    # test_mode = False
+    test_mode = True
    
    output_excel_paths = []
    for exam_type in exam_types:
        
        # JSON 채점기준표 파일 (예시:DIW_2503A.json)
-        scoring_criteria_path = f'./DIW_{exam_round}.json'
-        # scoring_criteria_path = f'./DIW_{exam_round}{exam_type}.json'
+        # scoring_criteria_path = f'./DIW_{exam_round}.json'
+        scoring_criteria_path = f'./DIW_{exam_round}{exam_type}.json'
        
        # xml(hml)파일 디렉토리 경로 (예시:./output/A/DIW)
        # xml_directory = f'./output/{exam_type}/{"TEST" if test_mode else "DIW"}'