1번문항채점가능

2025-01-08 17:43:24 +09:00
parent ec3a3ba833
commit a4b6e22e4f
80 changed files with 1219 additions and 56 deletions
--- a/score5.py
+++ b/score5.py
@@ -0,0 +1,265 @@
+from datetime import datetime
+import json
+import glob
+from pathlib import Path
+import os
+from lxml import etree as ET
+from difflib import SequenceMatcher
+import pandas as pd
+# from xpathSearch import XMLPathHandler
+
+
+class XMLScorer:
+    # 채점 기준 경로 초기화
+    def __init__(self, scoring_criteria_path):
+        # 채점 기준 로드
+        self.scoring_criteria = self._load_scoring_criteria(scoring_criteria_path)
+    
+    # 채점 기준파일 로드(JSON 파일)
+    def _load_scoring_criteria(self, file_path):
+        with open(file_path, 'r', encoding='utf-8') as f:
+            return json.load(f)
+
+    # XML 파일에서 element의 값을 찾아 반환  
+    def query_xml(self, root, query):
+        try:
+            result = root.xpath(query)
+            if type(result) is list and len(result) == 0:
+                return None
+            return result
+        except ET.XPathEvalError as e:
+            return None
+    
+    # 유사한 텍스트 찾기
+    def find_similar_text(self, root, target_text, threshold=0.3):
+        """ 
+        전체 문서에서 유사한 텍스트를 찾아 반환
+
+        Args:
+            root (_type_): xml root element 객체
+            target_text (_type_): 찾을 텍스트
+            threshold (float, optional): 유사도 설정 Defaults to 0.3.
+
+        Returns:
+            str: 유사도 기준을 만족하는 텍스트 
+        """
+        # 전체 텍스트 추출
+        # all_text = root.xpath(f"//CHAR/text()")
+        # all_text.append(root.xpath(f"//TEXTART/@text"))
+        all_text = root.xpath(f"//CHAR/text() | //TEXTART/@Text")
+        
+        # 유사도 비교
+        max_score = 0
+        similar_text = ''
+        
+        for text in all_text:
+            score = SequenceMatcher(None, target_text, text).ratio()
+            
+            if score > max_score:
+                max_score = score
+                similar_text = text
+                
+        if max_score >= threshold:
+            return similar_text
+        else:
+            return None
+    
+    # 하나의 XML 파일 채점
+    def _score_xml_file(self, xml_path):
+        try:
+            tree = ET.parse(xml_path)
+            root = tree.getroot()
+            
+            total_score = 0
+            
+            # 결과값을 Dictionary로 저장
+            results = {
+                'filename': os.path.basename(xml_path),
+                'score_results': [],
+                'total_score': 0,
+            }
+            
+            print(f"File name: {results['filename']}")
+            
+            for criterion_id, criterion in self.scoring_criteria.items():
+                xpath = criterion['path']
+                search_value = criterion['searchValue']
+                right_answer = criterion['value']
+                points = criterion['points']
+                category = criterion['category']
+                item = criterion['item']
+                
+                simliar_text = None
+                
+                # searchValue가 있을 경우 유사한 텍스트 찾기
+                if search_value is not None:
+                    simliar_text = self.find_similar_text(root, search_value)
+                    if simliar_text is None:
+                        continue                        
+                    else: 
+                        xpath = xpath.replace('{searchValue}', simliar_text)
+
+                # xpath로 실제 작성 답안 찾기
+                result = self.query_xml(root, xpath)
+                    
+                # [ boolean 타입 ]
+                # 1. 이텔릭체, 굵게, 밑줄 등 효과가 적용 여부에 따라 
+                # [ITALIC] [BOLD] [UNDERLINE] 태그가 있거나 없을 수 있으므로
+                # 존재 유무에 따라 True, False로 판단
+                # 2. 두 가지 이상의 조건을 모두 만족해야 하는 경우 and 연산자로 연결되어
+                # 반환값 True/False로 판단
+                # [ float 타입 ]
+                # 1. 부분점수의 합산으로 반환되는 경우 float 타입으로 반환
+                if type(result) is not list:
+                    actual_answer = result
+                else:
+                    actual_answer = result[0]
+                
+                scoring = {
+                    'category': category,  # 채점 분류   
+                    'item': item,  # 채점 항목
+                    'right_answer': right_answer,  # 정답
+                    'actual_answer': actual_answer,  # 실제 작성 답안
+                    'points': 0, 
+                    'deductions': []  # 각 기준별 감점 내역
+                }
+                
+                scoring['points'] = points
+
+                # 점수 차감 조건
+                # 1. 정답이 실수형으로 반환받은 경우는 채점항목의 부분점수 합산 결과이므로
+                # 반환받은 값 그대로를 점수로 사용
+                # 2. 그 외의 경우 정답과 실제 작성 답안이 다른 경우 점수 차감
+                if type(actual_answer) is float:
+                    scoring['points'] = actual_answer        
+                else:
+                    if right_answer != actual_answer:
+                        scoring['points'] -= points
+
+                results['score_results'].append(scoring)
+                total_score += scoring['points']
+                    
+                print(f'scoring: {scoring}')
+                          
+            results['total_score'] = total_score
+            return results
+                
+        except ET.ParseError as e:
+            return {
+                'filename': os.path.basename(xml_path),
+                'error': f"XML 파싱 오류: {str(e)}",
+                'total_score': 0
+            }
+        
+    # XML 파일 채점
+    def score_directory(self, xml_directory):
+        
+        # xml 파일 불러오기
+        xml_files = Path(xml_directory).glob('*.hml')
+    
+        # 결과 저장할 리스트
+        results = []
+    
+        for xml_file in xml_files:
+            result = self._score_xml_file(xml_file)
+            results.append(result)
+            
+        return results
+
+    def export_to_excel(self, results, output_path=None):
+        if output_path is None:
+            timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+            output_path = f"scoring_results_{timestamp}.xlsx"
+
+        summary_data = []
+        detail_data = []
+
+        for result in results:
+            # 요약 정보
+            summary_row = {
+                '파일명': result['filename'],
+                '총점': result.get('total_score', 0)
+            }
+            if 'error' in result:
+                summary_row['오류'] = result['error']
+                
+            summary_data.append(summary_row)
+
+            # 상세 정보
+            if 'score_results' in result:
+                detail_data.append({'파일명': result['filename']})
+                for scoring in result['score_results']:
+                    # detail_row = {
+                    #     '파일명': result['filename'],
+                    #     # '채점분류': scoring['category'],
+                    #     # '채점항목': scoring['item'],
+                    #     # '채점기준': scoring['right_answer'],
+                    #     # '적용답안': scoring['actual_answer'],
+                    #     '획득점수': scoring['points'],
+                    #     # '감점내역': '; '.join(scoring.get('deductions', []))
+                    # }
+                    # detail_data.append(detail_row)
+
+        summary_df = pd.DataFrame(summary_data)
+        detail_df = pd.DataFrame(detail_data)
+        
+
+        # ExcelWriter 객체 생성
+        with pd.ExcelWriter(output_path, engine='openpyxl') as writer:
+            summary_df.to_excel(writer, sheet_name='채점결과요약', index=False)
+            detail_df.to_excel(writer, sheet_name='채점상세내역', index=False)
+
+            # 열 너비 자동 조정
+            for sheet_name in writer.sheets:
+                worksheet = writer.sheets[sheet_name]
+                for column_cells in worksheet.columns:
+                    max_length = 0
+                    column = column_cells[0].column_letter  # 열의 문자
+                    for cell in column_cells:
+                        try:
+                            if cell.value:
+                                max_length = max(max_length, len(str(cell.value)))
+                        except:
+                            pass
+                    adjusted_width = (max_length + 2)
+                    worksheet.column_dimensions[column].width = adjusted_width
+
+        return output_path
+
+
+def main():
+    scoring_criteria_path = r'C:\Users\dra\project\HWP-Scoring\scoring_criteria.json'
+
+    # xml(hml)파일 디렉토리 경로
+    xml_directory = r'C:\Users\dra\project\HWP-Scoring\output'
+
+    # 채점 클래스 초기화
+    scorer = XMLScorer(scoring_criteria_path)
+
+    # 폴더 내 모든 xml 파일 채점
+    results = scorer.score_directory(xml_directory)
+  
+#   for result in results:
+#     print(f"\n파일: {result['filename']}")
+#     if 'error' in result:
+#         print(f"오류: {result['error']}")
+#         continue
+        
+#     print(f"총점: {result['total_score']}")
+#     print("\n채점 세부사항:")
+#     for scoring in result['score_results']:
+#         print(f"채점분류: {scoring['category']}")
+#         print(f"채점항목: {scoring['item']}")
+#         print(f"요구 답안: {scoring['right_answer']}")
+#         print(f"작성 답안: {scoring['actual_answer']}")
+#         print(f"획득 점수: {scoring['points']}")
+#         print(f"감점 내역: {scoring['deductions']}")
+#         print("---")
+  
+    # 채점 결과 엑셀로 저장
+    output_excel_path = scorer.export_to_excel(results)
+    print(f"채점 결과 엑셀 파일: {output_excel_path}")    
+
+if __name__ == '__main__': 
+  main()
+