채점용 테스트 코드 추가

2024-11-12 16:29:29 +09:00
parent 5139475864
commit ec3a3ba833
7 changed files with 623 additions and 0 deletions
--- a/score3.py
+++ b/score3.py
@@ -0,0 +1,296 @@
+import json
+import xml.etree.ElementTree as ET
+import os
+from pathlib import Path
+import pandas as pd
+from datetime import datetime
+from difflib import SequenceMatcher
+import re
+
+class XMLScorer:
+    def __init__(self, scoring_criteria_path):
+        self.scoring_criteria = self._load_scoring_criteria(scoring_criteria_path)
+        
+    def _load_scoring_criteria(self, file_path):
+        with open(file_path, 'r', encoding='utf-8') as f:
+            return json.load(f)
+    
+    def _calculate_string_similarity(self, str1, str2):
+        """
+        두 문자열 간의 유사도를 계산합니다.
+        
+        Args:
+            str1 (str): 첫 번째 문자열
+            str2 (str): 두 번째 문자열
+            
+        Returns:
+            float: 유사도 (0~1 사이의 값)
+        """
+        return SequenceMatcher(None, str1, str2).ratio()
+    
+    def _count_differences(self, str1, str2):
+        """
+        두 문자열 간의 차이(오탈자, 띄어쓰기)를 계산합니다.
+        
+        Args:
+            str1 (str): 첫 번째 문자열 (기준값)
+            str2 (str): 두 번째 문자열 (비교값)
+            
+        Returns:
+            tuple: (전체 차이 개수, 띄어쓰기 차이 개수)
+        """
+        # 띄어쓰기 차이 계산
+        space_diff = abs(str1.count(' ') - str2.count(' '))
+        
+        # 전체 글자 차이 계산 (Levenshtein 거리 기반)
+        total_diff = 0
+        m, n = len(str1), len(str2)
+        dp = [[0] * (n + 1) for _ in range(m + 1)]
+        
+        for i in range(m + 1):
+            dp[i][0] = i
+        for j in range(n + 1):
+            dp[0][j] = j
+            
+        for i in range(1, m + 1):
+            for j in range(1, n + 1):
+                if str1[i-1] == str2[j-1]:
+                    dp[i][j] = dp[i-1][j-1]
+                else:
+                    dp[i][j] = min(dp[i-1][j], dp[i][j-1], dp[i-1][j-1]) + 1
+        
+        total_diff = dp[m][n]
+        
+        return total_diff, space_diff
+
+    def _find_similar_element(self, root, target_element):
+        """
+        유사한 요소를 찾습니다. 완전 일치하지 않더라도 비슷한 이름의 요소를 찾습니다.
+        
+        Args:
+            root (Element): XML 루트 요소
+            target_element (str): 찾고자 하는 요소 이름
+            
+        Returns:
+            Element: 가장 유사한 요소 또는 None
+        """
+        best_match = None
+        best_similarity = 0.7  # 최소 유사도 임계값
+        
+        for element in root.iter():
+            similarity = self._calculate_string_similarity(element.tag, target_element)
+            if similarity > best_similarity:
+                best_similarity = similarity
+                best_match = element
+                
+        return best_match
+    
+    def _find_element_value(self, root, element_name, attribute_name):
+        """
+        XML에서 특정 요소와 속성값을 찾습니다. 유사한 요소도 고려합니다.
+        
+        Args:
+            root (Element): XML 루트 요소
+            element_name (str): 찾을 요소 이름
+            attribute_name (str): 찾을 속성 이름
+            
+        Returns:
+            tuple: (찾은 속성값 또는 None, 요소 이름 오탈자 여부)
+        """
+        # 정확한 요소 찾기
+        element = root.find(f".//{element_name}")
+        
+        # 정확한 요소가 없으면 유사한 요소 찾기
+        if element is None:
+            element = self._find_similar_element(root, element_name)
+        
+        if element is not None:
+            # 속성값 찾기
+            value = element.get(attribute_name)
+            # 요소 이름이 정확히 일치하는지 확인
+            has_typo = element.tag != element_name
+            return value, has_typo
+            
+        return None, False
+    
+    def score_xml_file(self, xml_path):
+        try:
+            tree = ET.parse(xml_path)
+            root = tree.getroot()
+            
+            total_score = 0
+            results = {
+                'filename': os.path.basename(xml_path),
+                'criteria_matches': [],
+                'total_score': 0,
+                'deductions': []  # 감점 상세 내역 추가
+            }
+            
+            for criterion_id, criterion in self.scoring_criteria.items():
+                element_name = criterion['ele']
+                attribute_name = criterion['arg']
+                expected_value = criterion['value']
+                points = criterion['points']
+                
+                actual_value, has_element_typo = self._find_element_value(
+                    root, element_name, attribute_name)
+                
+                match = {
+                    'criterion': f"{element_name}.{attribute_name}",
+                    'expected': expected_value,
+                    'actual': actual_value,
+                    'points': 0,
+                    'deductions': []  # 각 기준별 감점 내역
+                }
+                
+                if actual_value is not None:
+                    # 기본 점수 부여
+                    match['points'] = points
+                    
+                    # 요소 이름에 오탈자가 있는 경우
+                    if has_element_typo:
+                        deduction = 1
+                        match['points'] -= deduction
+                        match['deductions'].append(
+                            f"요소 이름 오탈자 감점: -{deduction}점")
+                    
+                    # 속성값 비교 및 차이 계산
+                    if actual_value != expected_value:
+                        total_diff, space_diff = self._count_differences(
+                            expected_value, actual_value)
+                        
+                        # 띄어쓰기 차이당 1점 감점
+                        if space_diff > 0:
+                            match['points'] -= space_diff
+                            match['deductions'].append(
+                                f"띄어쓰기 오류 감점: -{space_diff}점")
+                        
+                        # 나머지 차이(오탈자)당 1점 감점
+                        char_diff = total_diff - space_diff
+                        if char_diff > 0:
+                            match['points'] -= char_diff
+                            match['deductions'].append(
+                                f"글자 오류 감점: -{char_diff}점")
+                    
+                    # 음수 점수 방지
+                    match['points'] = max(0, match['points'])
+                
+                results['criteria_matches'].append(match)
+                total_score += match['points']
+            
+            results['total_score'] = total_score
+            return results
+            
+        except ET.ParseError as e:
+            return {
+                'filename': os.path.basename(xml_path),
+                'error': f"XML 파싱 오류: {str(e)}",
+                'total_score': 0
+            }
+
+    def export_to_excel(self, results, output_path=None):
+        if output_path is None:
+            timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+            output_path = f"scoring_results_{timestamp}.xlsx"
+
+        summary_data = []
+        detail_data = []
+
+        for result in results:
+            # 요약 정보
+            summary_row = {
+                '파일명': result['filename'],
+                '총점': result.get('total_score', 0)
+            }
+            if 'error' in result:
+                summary_row['오류'] = result['error']
+            summary_data.append(summary_row)
+
+            # 상세 정보
+            if 'criteria_matches' in result:
+                for match in result['criteria_matches']:
+                    detail_row = {
+                        '파일명': result['filename'],
+                        '채점항목': match['criterion'],
+                        '기대값': match['expected'],
+                        '실제값': match['actual'],
+                        '획득점수': match['points'],
+                        '감점내역': '; '.join(match.get('deductions', []))
+                    }
+                    detail_data.append(detail_row)
+
+        # DataFrame 생성
+        summary_df = pd.DataFrame(summary_data)
+        detail_df = pd.DataFrame(detail_data)
+
+        # ExcelWriter 객체 생성
+        with pd.ExcelWriter(output_path, engine='openpyxl') as writer:
+            summary_df.to_excel(writer, sheet_name='채점결과요약', index=False)
+            detail_df.to_excel(writer, sheet_name='채점상세내역', index=False)
+
+            # 열 너비 자동 조정
+            for sheet_name in writer.sheets:
+                worksheet = writer.sheets[sheet_name]
+                for column in worksheet.columns:
+                    max_length = 0
+                    column = [cell for cell in column]
+                    for cell in column:
+                        try:
+                            if len(str(cell.value)) > max_length:
+                                max_length = len(str(cell.value))
+                        except:
+                            pass
+                    adjusted_width = (max_length + 2)
+                    worksheet.column_dimensions[column[0].column_letter].width = adjusted_width
+
+        return output_path
+
+    def score_directory(self, xml_directory):
+        results = []
+        xml_files = Path(xml_directory).glob('*.xml')
+        
+        for xml_file in xml_files:
+            result = self.score_xml_file(str(xml_file))
+            results.append(result)
+            
+        return results
+    
+
+    
+# 사용 예시
+def main():
+    # 채점기준표 파일 경로
+    scoring_criteria_path = "scoring_criteria.json"
+    # XML 파일들이 있는 디렉토리 경로
+    xml_directory = r"C:\Users\gzero-ser7-win11\Documents\hwpTest\Output"
+    
+    # 채점기 초기화
+    scorer = XMLScorer(scoring_criteria_path)
+    
+    # 디렉토리 내 모든 XML 파일 채점
+    results = scorer.score_directory(xml_directory)
+    
+    # 결과 출력
+    for result in results:
+        print(f"\n파일: {result['filename']}")
+        if 'error' in result:
+            print(f"오류: {result['error']}")
+            continue
+            
+        print(f"총점: {result['total_score']}")
+        print("\n채점 세부사항:")
+        for match in result['criteria_matches']:
+            print(f"기준: {match['criterion']}")
+            print(f"기대값: {match['expected']}")
+            print(f"실제값: {match['actual']}")
+            print(f"획득 점수: {match['points']}")
+            print("---")
+
+     # 결과를 엑셀 파일로 저장
+    excel_path = scorer.export_to_excel(results)
+    print(f"\n채점 결과가 다음 경로에 저장되었습니다: {excel_path}")
+
+if __name__ == "__main__":
+    main()
+
+