score3.py

import json
import xml.etree.ElementTree as ET
import os
from pathlib import Path
import pandas as pd
from datetime import datetime
from difflib import SequenceMatcher
import re


class XMLScorer:
    # 채점 기준 경로 초기화
    def __init__(self, scoring_criteria_path):
        # 채점 기준 로드
        self.scoring_criteria = self._load_scoring_criteria(scoring_criteria_path)
        print(self.scoring_criteria)
    
    # 채점 기준파일 로드(JSON 파일)
    def _load_scoring_criteria(self, file_path):
        with open(file_path, 'r', encoding='utf-8') as f:
            return json.load(f)
    
    def _calculate_string_similarity(self, str1, str2):
        """
        두 문자열 간의 유사도를 계산합니다.
        
        Args:
            str1 (str): 첫 번째 문자열
            str2 (str): 두 번째 문자열
            
        Returns:
            float: 유사도 (0~1 사이의 값)
        """
        return SequenceMatcher(None, str1, str2).ratio()
    
    def _count_differences(self, str1, str2):
        """
        두 문자열 간의 차이(오탈자, 띄어쓰기)를 계산합니다.
        
        Args:
            str1 (str): 첫 번째 문자열 (기준값)
            str2 (str): 두 번째 문자열 (비교값)
            
        Returns:
            tuple: (전체 차이 개수, 띄어쓰기 차이 개수)
        """
        # 띄어쓰기 차이 계산
        space_diff = abs(str1.count(' ') - str2.count(' '))
        
        # 전체 글자 차이 계산 (Levenshtein 거리 기반)
        total_diff = 0
        m, n = len(str1), len(str2)
        dp = [[0] * (n + 1) for _ in range(m + 1)]
        
        for i in range(m + 1):
            dp[i][0] = i
        for j in range(n + 1):
            dp[0][j] = j
            
        for i in range(1, m + 1):
            for j in range(1, n + 1):
                if str1[i-1] == str2[j-1]:
                    dp[i][j] = dp[i-1][j-1]
                else:
                    dp[i][j] = min(dp[i-1][j], dp[i][j-1], dp[i-1][j-1]) + 1
        
        total_diff = dp[m][n]
        
        return total_diff, space_diff

    def _find_similar_element(self, root, target_element):
        """
        유사한 요소를 찾습니다. 완전 일치하지 않더라도 비슷한 이름의 요소를 찾습니다.
        
        Args:
            root (Element): XML 루트 요소
            target_element (str): 찾고자 하는 요소 이름
            
        Returns:
            Element: 가장 유사한 요소 또는 None
        """
        best_match = None
        best_similarity = 0.7  # 최소 유사도 임계값
        
        for element in root.iter():
            similarity = self._calculate_string_similarity(element.tag, target_element)
            if similarity > best_similarity:
                best_similarity = similarity
                best_match = element
                
        return best_match
    
    def _find_element_value(self, root, element_name, attribute_name):
        """
        XML에서 특정 요소와 속성값을 찾습니다. 유사한 요소도 고려합니다.
        
        Args:
            root (Element): XML 루트 요소
            element_name (str): 찾을 요소 이름
            attribute_name (str): 찾을 속성 이름
            
        Returns:
            tuple: (찾은 속성값 또는 None, 요소 이름 오탈자 여부)
        """
        # 정확한 요소 찾기
        element = root.find(f".//{element_name}")
        
        # 정확한 요소가 없으면 유사한 요소 찾기
        if element is None:
            element = self._find_similar_element(root, element_name)
        
        if element is not None:
            # 속성값 찾기
            value = element.get(attribute_name)
            # 요소 이름이 정확히 일치하는지 확인
            has_typo = element.tag != element_name
            return value, has_typo
            
        return None, False
    
    # XML 파일 채점
    def score_xml_file(self, xml_path):
        try:
            tree = ET.parse(xml_path)
            root = tree.getroot()
            
            total_score = 0
            # 결과값을 Dictionary로 저장
            results = {
                'filename': os.path.basename(xml_path),
                'criteria_matches': [], # 채점 항목별 결과
                'total_score': 0,
                'deductions': []  # 감점 상세 내역 추가
            }
            
            for criterion_id, criterion in self.scoring_criteria.items():
                element_name = criterion['ele']
                attribute_name = criterion['arg']
                expected_value = criterion['value']
                points = criterion['points']
                
                actual_value, has_element_typo = self._find_element_value(
                    root, element_name, attribute_name) 
                
                # 채점 결과 저장
                match = {
                    'criterion': f"{element_name}.{attribute_name}", # 채점 항목
                    'expected': expected_value, # 기대값
                    'actual': actual_value, # 실제값
                    'points': 0, # 획득 점수
                    'deductions': []  # 각 기준별 감점 내역
                }
                
                if actual_value is not None:
                    # 기본 점수 부여
                    match['points'] = points
                    
                    # 요소 이름에 오탈자가 있는 경우
                    if has_element_typo:
                        deduction = 1
                        match['points'] -= deduction
                        match['deductions'].append(
                            f"요소 이름 오탈자 감점: -{deduction}점")
                    
                    # 속성값 비교 및 차이 계산
                    if actual_value != expected_value:
                        total_diff, space_diff = self._count_differences(
                            expected_value, actual_value)
                        
                        # 띄어쓰기 차이당 1점 감점
                        if space_diff > 0:
                            match['points'] -= space_diff
                            match['deductions'].append(
                                f"띄어쓰기 오류 감점: -{space_diff}점")
                        
                        # 나머지 차이(오탈자)당 1점 감점
                        char_diff = total_diff - space_diff
                        if char_diff > 0:
                            match['points'] -= char_diff
                            match['deductions'].append(
                                f"글자 오류 감점: -{char_diff}점")
                    
                    # 음수 점수 방지
                    match['points'] = max(0, match['points'])
                
                results['criteria_matches'].append(match)
                total_score += match['points']
            
            results['total_score'] = total_score
            return results
            
        except ET.ParseError as e:
            return {
                'filename': os.path.basename(xml_path),
                'error': f"XML 파싱 오류: {str(e)}",
                'total_score': 0
            }

    def export_to_excel(self, results, output_path=None):
        if output_path is None:
            timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
            output_path = f"scoring_results_{timestamp}.xlsx"

        summary_data = []
        detail_data = []

        for result in results:
            # 요약 정보
            summary_row = {
                '파일명': result['filename'],
                '총점': result.get('total_score', 0)
            }
            if 'error' in result:
                summary_row['오류'] = result['error']
            summary_data.append(summary_row)

            # 상세 정보
            if 'criteria_matches' in result:
                for match in result['criteria_matches']:
                    detail_row = {
                        '파일명': result['filename'],
                        '채점항목': match['criterion'],
                        '기대값': match['expected'],
                        '실제값': match['actual'],
                        '획득점수': match['points'],
                        '감점내역': '; '.join(match.get('deductions', []))
                    }
                    detail_data.append(detail_row)

        # DataFrame 생성
        summary_df = pd.DataFrame(summary_data)
        detail_df = pd.DataFrame(detail_data)

        # ExcelWriter 객체 생성
        with pd.ExcelWriter(output_path, engine='openpyxl') as writer:
            summary_df.to_excel(writer, sheet_name='채점결과요약', index=False)
            detail_df.to_excel(writer, sheet_name='채점상세내역', index=False)

            # 열 너비 자동 조정
            for sheet_name in writer.sheets:
                worksheet = writer.sheets[sheet_name]
                for column in worksheet.columns:
                    max_length = 0
                    column = [cell for cell in column]
                    for cell in column:
                        try:
                            if len(str(cell.value)) > max_length:
                                max_length = len(str(cell.value))
                        except:
                            pass
                    adjusted_width = (max_length + 2)
                    worksheet.column_dimensions[column[0].column_letter].width = adjusted_width

        return output_path

    def score_directory(self, xml_directory):
        results = []
        # xml_files = Path(xml_directory).glob('*.xml')
        xml_files = Path(xml_directory).glob('*.hml')
        
        for xml_file in xml_files:
            result = self.score_xml_file(str(xml_file))
            results.append(result)
            
        return results
    
# 사용 예시
def main():
    # 채점기준표 파일 경로
    scoring_criteria_path = "scoring_criteria.json"
    # XML 파일들이 있는 디렉토리 경로
    # xml_directory = r"C:\Users\gzero-ser7-win11\Documents\hwpTest\Output"
    xml_directory = r"C:\Users\dra\project\HWP-Scoring\output"
    
    # 채점기 초기화
    scorer = XMLScorer(scoring_criteria_path)
    
    # 디렉토리 내 모든 XML 파일 채점
    results = scorer.score_directory(xml_directory)
    
    # 결과 출력
    for result in results:
        print(f"\n파일: {result['filename']}")
        if 'error' in result:
            print(f"오류: {result['error']}")
            continue
            
        print(f"총점: {result['total_score']}")
        print("\n채점 세부사항:")
        for match in result['criteria_matches']:
            print(f"기준: {match['criterion']}")
            print(f"기대값: {match['expected']}")
            print(f"실제값: {match['actual']}")
            print(f"획득 점수: {match['points']}")
            print("---")

     # 결과를 엑셀 파일로 저장
    excel_path = scorer.export_to_excel(results)
    print(f"\n채점 결과가 다음 경로에 저장되었습니다: {excel_path}")

if __name__ == "__main__":
     main()
채점용 테스트 코드 추가 2024-11-12 16:29:29 +09:00			`import json`
			`import xml.etree.ElementTree as ET`
			`import os`
			`from pathlib import Path`
			`import pandas as pd`
			`from datetime import datetime`
			`from difflib import SequenceMatcher`
			`import re`
차트 없는 친구들 xml파일 생성안되고 채점 상세 내역에 추가되지 않음음 2025-01-17 19:06:25 +09:00
채점용 테스트 코드 추가 2024-11-12 16:29:29 +09:00
			`class XMLScorer:`
1번문항채점가능 2025-01-08 17:43:24 +09:00			`# 채점 기준 경로 초기화`
채점용 테스트 코드 추가 2024-11-12 16:29:29 +09:00			`def __init__(self, scoring_criteria_path):`
1번문항채점가능 2025-01-08 17:43:24 +09:00			`# 채점 기준 로드`
채점용 테스트 코드 추가 2024-11-12 16:29:29 +09:00			`self.scoring_criteria = self._load_scoring_criteria(scoring_criteria_path)`
1번문항채점가능 2025-01-08 17:43:24 +09:00			`print(self.scoring_criteria)`

			`# 채점 기준파일 로드(JSON 파일)`
채점용 테스트 코드 추가 2024-11-12 16:29:29 +09:00			`def _load_scoring_criteria(self, file_path):`
			`with open(file_path, 'r', encoding='utf-8') as f:`
			`return json.load(f)`

			`def _calculate_string_similarity(self, str1, str2):`
			`"""`
			`두 문자열 간의 유사도를 계산합니다.`

			`Args:`
			`str1 (str): 첫 번째 문자열`
			`str2 (str): 두 번째 문자열`

			`Returns:`
			`float: 유사도 (0~1 사이의 값)`
			`"""`
			`return SequenceMatcher(None, str1, str2).ratio()`

			`def _count_differences(self, str1, str2):`
			`"""`
			`두 문자열 간의 차이(오탈자, 띄어쓰기)를 계산합니다.`

			`Args:`
			`str1 (str): 첫 번째 문자열 (기준값)`
			`str2 (str): 두 번째 문자열 (비교값)`

			`Returns:`
			`tuple: (전체 차이 개수, 띄어쓰기 차이 개수)`
			`"""`
			`# 띄어쓰기 차이 계산`
			`space_diff = abs(str1.count(' ') - str2.count(' '))`

			`# 전체 글자 차이 계산 (Levenshtein 거리 기반)`
			`total_diff = 0`
			`m, n = len(str1), len(str2)`
			`dp = [[0] * (n + 1) for _ in range(m + 1)]`

			`for i in range(m + 1):`
			`dp[i][0] = i`
			`for j in range(n + 1):`
			`dp[0][j] = j`

			`for i in range(1, m + 1):`
			`for j in range(1, n + 1):`
			`if str1[i-1] == str2[j-1]:`
			`dp[i][j] = dp[i-1][j-1]`
			`else:`
			`dp[i][j] = min(dp[i-1][j], dp[i][j-1], dp[i-1][j-1]) + 1`

			`total_diff = dp[m][n]`

			`return total_diff, space_diff`

			`def _find_similar_element(self, root, target_element):`
			`"""`
			`유사한 요소를 찾습니다. 완전 일치하지 않더라도 비슷한 이름의 요소를 찾습니다.`

			`Args:`
			`root (Element): XML 루트 요소`
			`target_element (str): 찾고자 하는 요소 이름`

			`Returns:`
			`Element: 가장 유사한 요소 또는 None`
			`"""`
			`best_match = None`
			`best_similarity = 0.7 # 최소 유사도 임계값`

			`for element in root.iter():`
			`similarity = self._calculate_string_similarity(element.tag, target_element)`
			`if similarity > best_similarity:`
			`best_similarity = similarity`
			`best_match = element`

			`return best_match`

			`def _find_element_value(self, root, element_name, attribute_name):`
			`"""`
			`XML에서 특정 요소와 속성값을 찾습니다. 유사한 요소도 고려합니다.`

			`Args:`
			`root (Element): XML 루트 요소`
			`element_name (str): 찾을 요소 이름`
			`attribute_name (str): 찾을 속성 이름`

			`Returns:`
			`tuple: (찾은 속성값 또는 None, 요소 이름 오탈자 여부)`
			`"""`
			`# 정확한 요소 찾기`
			`element = root.find(f".//{element_name}")`

			`# 정확한 요소가 없으면 유사한 요소 찾기`
			`if element is None:`
			`element = self._find_similar_element(root, element_name)`

			`if element is not None:`
			`# 속성값 찾기`
			`value = element.get(attribute_name)`
			`# 요소 이름이 정확히 일치하는지 확인`
			`has_typo = element.tag != element_name`
			`return value, has_typo`

			`return None, False`

1번문항채점가능 2025-01-08 17:43:24 +09:00			`# XML 파일 채점`
채점용 테스트 코드 추가 2024-11-12 16:29:29 +09:00			`def score_xml_file(self, xml_path):`
			`try:`
			`tree = ET.parse(xml_path)`
			`root = tree.getroot()`

			`total_score = 0`
1번문항채점가능 2025-01-08 17:43:24 +09:00			`# 결과값을 Dictionary로 저장`
채점용 테스트 코드 추가 2024-11-12 16:29:29 +09:00			`results = {`
			`'filename': os.path.basename(xml_path),`
1번문항채점가능 2025-01-08 17:43:24 +09:00			`'criteria_matches': [], # 채점 항목별 결과`
채점용 테스트 코드 추가 2024-11-12 16:29:29 +09:00			`'total_score': 0,`
			`'deductions': [] # 감점 상세 내역 추가`
			`}`

			`for criterion_id, criterion in self.scoring_criteria.items():`
			`element_name = criterion['ele']`
			`attribute_name = criterion['arg']`
			`expected_value = criterion['value']`
			`points = criterion['points']`

			`actual_value, has_element_typo = self._find_element_value(`
1번문항채점가능 2025-01-08 17:43:24 +09:00			`root, element_name, attribute_name)`
채점용 테스트 코드 추가 2024-11-12 16:29:29 +09:00
1번문항채점가능 2025-01-08 17:43:24 +09:00			`# 채점 결과 저장`
채점용 테스트 코드 추가 2024-11-12 16:29:29 +09:00			`match = {`
1번문항채점가능 2025-01-08 17:43:24 +09:00			`'criterion': f"{element_name}.{attribute_name}", # 채점 항목`
			`'expected': expected_value, # 기대값`
			`'actual': actual_value, # 실제값`
			`'points': 0, # 획득 점수`
채점용 테스트 코드 추가 2024-11-12 16:29:29 +09:00			`'deductions': [] # 각 기준별 감점 내역`
			`}`

			`if actual_value is not None:`
			`# 기본 점수 부여`
			`match['points'] = points`

			`# 요소 이름에 오탈자가 있는 경우`
			`if has_element_typo:`
			`deduction = 1`
			`match['points'] -= deduction`
			`match['deductions'].append(`
			`f"요소 이름 오탈자 감점: -{deduction}점")`

			`# 속성값 비교 및 차이 계산`
			`if actual_value != expected_value:`
			`total_diff, space_diff = self._count_differences(`
			`expected_value, actual_value)`

			`# 띄어쓰기 차이당 1점 감점`
			`if space_diff > 0:`
			`match['points'] -= space_diff`
			`match['deductions'].append(`
			`f"띄어쓰기 오류 감점: -{space_diff}점")`

			`# 나머지 차이(오탈자)당 1점 감점`
			`char_diff = total_diff - space_diff`
			`if char_diff > 0:`
			`match['points'] -= char_diff`
			`match['deductions'].append(`
			`f"글자 오류 감점: -{char_diff}점")`

			`# 음수 점수 방지`
			`match['points'] = max(0, match['points'])`

			`results['criteria_matches'].append(match)`
			`total_score += match['points']`

			`results['total_score'] = total_score`
			`return results`

			`except ET.ParseError as e:`
			`return {`
			`'filename': os.path.basename(xml_path),`
			`'error': f"XML 파싱 오류: {str(e)}",`
			`'total_score': 0`
			`}`

			`def export_to_excel(self, results, output_path=None):`
			`if output_path is None:`
			`timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")`
			`output_path = f"scoring_results_{timestamp}.xlsx"`

			`summary_data = []`
			`detail_data = []`

			`for result in results:`
			`# 요약 정보`
			`summary_row = {`
			`'파일명': result['filename'],`
			`'총점': result.get('total_score', 0)`
			`}`
			`if 'error' in result:`
			`summary_row['오류'] = result['error']`
			`summary_data.append(summary_row)`

			`# 상세 정보`
			`if 'criteria_matches' in result:`
			`for match in result['criteria_matches']:`
			`detail_row = {`
			`'파일명': result['filename'],`
			`'채점항목': match['criterion'],`
			`'기대값': match['expected'],`
			`'실제값': match['actual'],`
			`'획득점수': match['points'],`
			`'감점내역': '; '.join(match.get('deductions', []))`
			`}`
			`detail_data.append(detail_row)`

			`# DataFrame 생성`
			`summary_df = pd.DataFrame(summary_data)`
			`detail_df = pd.DataFrame(detail_data)`

			`# ExcelWriter 객체 생성`
			`with pd.ExcelWriter(output_path, engine='openpyxl') as writer:`
			`summary_df.to_excel(writer, sheet_name='채점결과요약', index=False)`
			`detail_df.to_excel(writer, sheet_name='채점상세내역', index=False)`

			`# 열 너비 자동 조정`
			`for sheet_name in writer.sheets:`
			`worksheet = writer.sheets[sheet_name]`
			`for column in worksheet.columns:`
			`max_length = 0`
			`column = [cell for cell in column]`
			`for cell in column:`
			`try:`
			`if len(str(cell.value)) > max_length:`
			`max_length = len(str(cell.value))`
			`except:`
			`pass`
			`adjusted_width = (max_length + 2)`
			`worksheet.column_dimensions[column[0].column_letter].width = adjusted_width`

			`return output_path`

			`def score_directory(self, xml_directory):`
			`results = []`
1번문항채점가능 2025-01-08 17:43:24 +09:00			`# xml_files = Path(xml_directory).glob('*.xml')`
			`xml_files = Path(xml_directory).glob('*.hml')`
채점용 테스트 코드 추가 2024-11-12 16:29:29 +09:00
			`for xml_file in xml_files:`
			`result = self.score_xml_file(str(xml_file))`
			`results.append(result)`

			`return results`

			`# 사용 예시`
			`def main():`
			`# 채점기준표 파일 경로`
			`scoring_criteria_path = "scoring_criteria.json"`
			`# XML 파일들이 있는 디렉토리 경로`
1번문항채점가능 2025-01-08 17:43:24 +09:00			`# xml_directory = r"C:\Users\gzero-ser7-win11\Documents\hwpTest\Output"`
			`xml_directory = r"C:\Users\dra\project\HWP-Scoring\output"`
채점용 테스트 코드 추가 2024-11-12 16:29:29 +09:00
			`# 채점기 초기화`
			`scorer = XMLScorer(scoring_criteria_path)`

			`# 디렉토리 내 모든 XML 파일 채점`
			`results = scorer.score_directory(xml_directory)`

			`# 결과 출력`
			`for result in results:`
			`print(f"\n파일: {result['filename']}")`
			`if 'error' in result:`
			`print(f"오류: {result['error']}")`
			`continue`

			`print(f"총점: {result['total_score']}")`
			`print("\n채점 세부사항:")`
			`for match in result['criteria_matches']:`
			`print(f"기준: {match['criterion']}")`
			`print(f"기대값: {match['expected']}")`
			`print(f"실제값: {match['actual']}")`
			`print(f"획득 점수: {match['points']}")`
			`print("---")`

			`# 결과를 엑셀 파일로 저장`
			`excel_path = scorer.export_to_excel(results)`
			`print(f"\n채점 결과가 다음 경로에 저장되었습니다: {excel_path}")`

			`if __name__ == "__main__":`
1번문항채점가능 2025-01-08 17:43:24 +09:00			`main()`
채점용 테스트 코드 추가 2024-11-12 16:29:29 +09:00