from datetime import datetime
import difflib
import json
from pathlib import Path
import os
from lxml import etree
import re
from difflib import SequenceMatcher
import pandas as pd
import base64
import math
from itertools import chain

# from xpathSearch import XMLPathHandler

class XMLScorer:
    # 채점 기준 경로 초기화
    def __init__(self, scoring_criteria_path):
        # 채점 기준 로드
        self.scoring_criteria = self._load_scoring_criteria(scoring_criteria_path)
        self.total_score = 0
        self.partial_score = 0
        self.typo_score = 0
        
    def set_typo_score(self, score):
        self.typo_score = score
        
    def get_typo_score(self):
        return self.typo_score
    
    # 채점 기준파일 로드(JSON 파일)
    def _load_scoring_criteria(self, file_path):
        with open(file_path, 'r', encoding='utf-8') as f:
            return json.load(f)

    # mm to pt
    def convert_mm_to_pt(self, mm):
        one_mm_per_pt = 2.83465
        hwp_scale = 100
        pt = math.trunc(mm * one_mm_per_pt * hwp_scale)
        return pt

    def convert_pt_to_mm(self, pt):
        one_mm_per_pt = 2.83465
        hwp_scale = 100
        mm = round(pt / (one_mm_per_pt * hwp_scale), 1)
        return mm
        
    # 유사한 텍스트 찾기
    def find_similar_text(self, root, target_text, xml_type, threshold=0.7):
        """ 
        전체 문서에서 유사한 텍스트를 찾아 반환
        
        Args:
            root (_type_): xml root element 객체
            target_text (_type_): 찾을 텍스트
            threshold (float, optional): 유사도 설정 Defaults to 0.3.

        Returns:
            str: 유사도 기준을 만족하는 텍스트 
        """
        # 전체 텍스트 추출
        # all_text = root.xpath(f"//CHAR/text()")
        # all_text.append(root.xpath(f"//TEXTART/@text"))

        namespaces = {
        'a': 'http://schemas.openxmlformats.org/drawingml/2006/main',
        'c': 'http://schemas.openxmlformats.org/drawingml/2006/chart'
        }
        
        if xml_type == "hml":
            all_text = root.xpath(f"//BODY//text() | //TEXTART/@Text") if root is not None else []
        
        elif xml_type == "chart":
            all_text = root.xpath(f"//c:chart//text()", namespaces=namespaces) if root is not None else []
        
        else:
            all_text = []

        # 유사도 비교
        max_score = 0
        similar_text = ''
        
        for text in all_text:
            score = SequenceMatcher(None, target_text, text).ratio()
            
            if score > max_score:
                max_score = score
                similar_text = text
                
        if max_score >= threshold:
            return similar_text
        else:
            return target_text
    
    # 정답 비교 및 점수 계산
    def evaluate_answer(self, scoring, user_answer, right_answer, points,
                        method="equal", tolerance=0):
        
        scoring['user_answer'] = user_answer

        is_correct = False

        # 일치 여부 확인
        if method == "equal":
            is_correct = (user_answer == right_answer)
            
        # 정답이 오차범위가 필요한 경우
        elif method == "tolerance":
            if isinstance(user_answer, dict) and isinstance(right_answer, dict):
                is_correct = all(abs(user_answer[k] - right_answer[k]) <= tolerance for k in right_answer)
            else:
                is_correct = abs(user_answer - right_answer) <= tolerance
        
        # 정답이 포함되어 있는 경우
        elif method == "in":
            is_correct = user_answer in right_answer
        
        # 정답을 부분점수로 계산(특수문자, 한자)
        elif method == "partial_score":
            # 부분 점수 계산
            is_correct = isinstance(user_answer, (int, float)) and user_answer <= right_answer
            points = min(points, user_answer)
        else:
            raise ValueError(f"Unknown comparison method: {method}")

        if is_correct:
            scoring['points'] = points
            self.total_score += points
            self.partial_score += points
        else:
            scoring['points'] = 0
            
    # 하나의 XML 파일 채점
    def _score_xml_file(self, xml_file, chart_xml):
        def parse_pages_by_bookmark(root):
            """
            BOOKMARK(Name="Page_X_start" ~ "Page_X_end") 사이의 최상위 블록 요소들을
            페이지 단위로 딕셔너리에 저장하여 XML 구조를 유지합니다.

            Args:
                root: lxml etree의 루트 요소

            Returns:
                dict: 페이지 이름을 키로, 해당 페이지의 lxml 요소 리스트를 값으로 하는 딕셔너리
            """
            pages = {}
            
            # 1. 'SECTION'의 직계 자식들만 가져와 평탄화를 방지합니다.
            #    이것들이 문서의 최상위 구조를 이루는 요소들입니다 (P, TABLE 등).
            all_top_blocks = root.xpath('//SECTION/*')

            current_page = None
            page_start_index = None

            # 2. 이 최상위 블록 리스트를 순회합니다.
            for i, block in enumerate(all_top_blocks):
                # 3. 현재 블록 '내부'에서 시작 북마크를 탐색합니다. ('.//' 사용)
                start_bookmarks = block.xpath('.//BOOKMARK[contains(@Name, "_start")]')
                if start_bookmarks:
                    # 여러 북마크가 있을 수 있으니 첫 번째 것을 기준으로 합니다.
                    name = start_bookmarks[0].get('Name')
                    current_page = name.replace('_start', '')
                    page_start_index = i

                # 4. 현재 페이지를 찾는 중이고, 끝 북마크를 찾았을 경우
                #    f-string을 사용하여 정확한 끝 북마크 이름을 찾습니다.
                if current_page is not None and block.xpath(f'.//BOOKMARK[@Name="{current_page}_end"]'):
                    page_end_index = i
                    
                    # 5. 시작 인덱스부터 끝 인덱스까지 'all_top_blocks' 리스트를 슬라이싱합니다.
                    #    이 슬라이스에는 중첩 구조가 온전히 보존된 최상위 요소들이 담깁니다.
                    page_content = all_top_blocks[page_start_index : page_end_index + 1]
                    pages[current_page] = page_content
                    
                    # 상태 초기화
                    current_page = None
                    page_start_index = None
                    
            return pages
    
        def extract_char_text_from_p(p_element):
            """
            주어진 <P> 요소에서 모든 자손 <CHAR>의 텍스트를 추출해 문자열 리스트로 반환합니다.
            """
            full_text = []
            for p in p_element:
                char_elements = p.xpath('.//CHAR')
                combined_text = ''.join([char.text for char in char_elements if char.text])
                no_space_text = re.sub(r'\s+', '', combined_text)  # 공백 문자 제거
                full_text.append(no_space_text)
            return full_text
        
        try:
            tree = etree.parse(xml_file)
            root = tree.getroot()
            
            # XML문서 페이지 파싱 전처리
            pages = parse_pages_by_bookmark(root)
            # print("🚩Pages : ", pages)
            
            # 네임스페이스 정의
            namespaces = {
            'a': 'http://schemas.openxmlformats.org/drawingml/2006/main',
            'c': 'http://schemas.openxmlformats.org/drawingml/2006/chart'
            }
            
            # 차트 XML 파일이 없는 경우 0점 채점을 위헤 빈 XML 생성
            if chart_xml is None:
                chart_tree = etree.fromstring('<xml></xml>')
            else:
                chart_tree = etree.fromstring(chart_xml)
            
            # 결과값을 Dictionary로 저장
            # 하나의 xml파일 = 수험생 한명의 답안지
            onePersonResult = {
                'filename': os.path.basename(xml_file),
                'score_results': [],
                'total_score': 0,
                'partial_scores': []
            }
            print(f"🔜File name: {onePersonResult['filename']}")
            
            self.total_score = 0 
            for section_id, section in self.scoring_criteria.items():
                self.partial_score = 0
                for criterion_id, criterion in section.items():
                    id = criterion_id
                    xpath = criterion.get('path', None)
                    xpath2 = criterion.get('path2', None)
                    xpath3 = criterion.get('path3', None)
                    chart_xpath = criterion.get('chart_xpath', None)
                    search_value = criterion.get('searchValue', None)
                    right_answer = criterion.get('value', None)
                    points = criterion.get('points', 0)
                    category = criterion.get('category', None)
                    item = criterion.get('item', None)
                    option = criterion.get('option', None)
                    ignore_word = criterion.get('ignoreWord', None)
                    similar_text = None
                    
                    # search_value가 있는 경우        
                    if search_value is not None:
                        if xpath or xpath2:
                            similar_text = self.find_similar_text(root, search_value, xml_type="hml")
                            xpath = xpath.replace('{searchValue}', similar_text) if xpath else ""
                            xpath2 = xpath2.replace('{searchValue}', similar_text) if xpath2 else ""
                        if chart_xpath:
                            similar_text = self.find_similar_text(chart_tree, search_value, xml_type="chart")
                            chart_xpath = chart_xpath.replace('{searchValue}', similar_text) if chart_xpath else ""
                    
                    if option:
                        xpath = xpath.replace('{option}', option) if xpath else ""
                        xpath2 = xpath2.replace('{option}', option) if xpath2 else ""
                        chart_xpath = chart_xpath.replace('{option}', option) if chart_xpath else ""
                        
                    if ignore_word:
                        xpath = xpath.replace('{ignoreWord}', ignore_word) if xpath else ""
                    
                        
                    # 문항 별 채점 결과 저장
                    scoring = {
                        'section': section_id,
                        'id': id,
                        'category': category,           # 채점 분류   
                        'item': item,                   # 채점 항목
                        'right_answer': right_answer,   # 정답
                        'user_answer': None,            # 실제 작성 답안
                        'points': 0,                    # 점수
                    }
                    
                    try:
                        has_page2_rectangle = False
                        if "Rectangle" in (category or ""):
                            def has_elements(ptags, xpath):
                                for p in ptags:
                                    element_list = p.xpath(xpath) if xpath else []
                                    if element_list:
                                        return True
                                return False
                            
                            page2_ptags = pages.get('Page_2', [])
                            rectangle_xpath = ".//RECTANGLE"
                            has_page2_rectangle = has_elements(page2_ptags, rectangle_xpath)
                            
                            if not has_page2_rectangle:
                                user_answer = None
                                self.evaluate_answer(scoring, user_answer, right_answer, points, method="equal")
                                continue
                            else:
                                rect_charshapes = []
                                rect_parashapes = []
                                for p in page2_ptags:
                                    rect_charshapes.extend(p.xpath(".//RECTANGLE//TEXT/@CharShape"))
                                    rect_parashapes.extend(p.xpath(".//RECTANGLE//P/@ParaShape"))
                                    
                        # 머릿말과 관련된 문항에서 1페이지에 머릿말이 없는 경우의 처리
                        # [1-25, 26, 27] 문항 'DIAT' 머릿말 채점시 1페이지에 머릿말이 없으면
                        # 채점하지 않고 0점 처리
                        if "Header" in (category or ""):   
                            def has_elements(ptags, xpath):
                                for p in ptags:
                                    element_list = p.xpath(xpath) if xpath else []
                                    if element_list:
                                        return True
                                return False
                        
                            page1_ptags = pages.get('Page_1', [])
                            header_xpath = ".//HEADER//P"
                            has_page1_header = has_elements(page1_ptags, header_xpath)
                            
                            if not has_page1_header:
                                user_answer = None
                                self.evaluate_answer(scoring, user_answer, right_answer, points, method="equal")
                                continue
                        
                        if (category or "") == "PageSetting":
                            items = root.xpath(xpath)
                            error_range = criterion.get('tolerance', 0)
                                                        
                            right_answer = {
                                'Top' : float(right_answer.get("Top", 0)),
                                'Bottom' : float(right_answer.get("Bottom", 0)),
                                'Left' : float(right_answer.get("Left", 0)),
                                'Right' : float(right_answer.get("Right", 0)),
                                'Header' : float(right_answer.get("Header", 0)),
                                'Footer' : float(right_answer.get("Footer", 0)),
                                'Gutter' : float(right_answer.get("Gutter", 0)),
                            }
                            right_answer = {
                                k: self.convert_mm_to_pt(v) 
                                for k, v in right_answer.items()
                            }
                            
                            for item in items:
                                user_answer = {
                                    'Top' : float(item.get("Top", 0)),
                                    'Bottom' : float(item.get("Bottom", 0)),
                                    'Left' : float(item.get("Left", 0)),
                                    'Right' : float(item.get("Right", 0)),
                                    'Header' : float(item.get("Header", 0)),
                                    'Footer' : float(item.get("Footer", 0)),
                                    'Gutter' : float(item.get("Gutter", 0)),
                                }

                                self.evaluate_answer(scoring, user_answer, right_answer, points, method="tolerance", tolerance=error_range)

                                if scoring['points'] > 0:
                                    break                            
                                
                        elif (category or "") == "BasicSetting":
                            # FontName, FontSize, Alignment, LineSpacing
                            # 해당 속성의 요소(텍스트)가 문서 내부에 존재하면 정답처리

                            matches = set()

                            # P 태그 순회
                            for p_tag in root.xpath(".//P"):
                                parashape = p_tag.get("ParaShape")
                                
                                for text_tag in p_tag.xpath(".//TEXT"):
                                    charshape = text_tag.get("CharShape")
                                    
                                    if parashape is not None and charshape is not None:
                                        matches.add((parashape, charshape))

                            # 출력
                            for para, char in matches:
                                # print(f"ParaShape = {para}, CharShape = {char}")
                                font_id = root.xpath(f"//CHARSHAPE[@Id='{char}']/FONTID/@Hangul")
                                font_name = root.xpath(f"//FONTFACE[@Lang='Hangul']/FONT[@Id='{font_id[0]}']/@Name")
                                
                                user_answer = {
                                    'FontName': font_name[0],
                                    'FontSize': root.xpath(f"//CHARSHAPE[@Id='{char}']/@Height")[0],
                                    'Alignment': root.xpath(f"//PARASHAPE[@Id='{para}']/@Align")[0],
                                    'LineSpacing': root.xpath(f"//PARASHAPE[@Id='{para}']/PARAMARGIN/@LineSpacing")[0]
                                }
                                
                                # 정답과 수험자 답안 비교
                                self.evaluate_answer(scoring, user_answer, right_answer, points, method="equal")
                                
                                if scoring['points'] > 0:
                                    break
                        
                        # 1, 2페이지 모두 정답이어야 함
                        elif (category or "") == "PageNumber":
                            items = root.xpath(xpath) if xpath else []
                                                    
                            all_match = True
                            for item in chain(items):                    
                                user_answer = item
                                if right_answer != user_answer:
                                    all_match = False
                                    break
                            
                            if all_match:
                                self.evaluate_answer(scoring, user_answer, right_answer, points, method="equal")
                            else:
                                self.evaluate_answer(scoring, user_answer, right_answer, 0, method="equal")
                                
                        
                        # 오타 감점 부분은 미리 계산 하고, 이후 점수만 계산
                        elif (category or "") == "오타감점":
                            points = self.get_typo_score()
                            self.total_score += points
                            self.partial_score += points
                            scoring['points'] = points

                        # 테이블의 경우 모든 셀에 요구사항이 적용되어야 정답처리
                        elif (category or "") == "TableAnswer":
                            items = root.xpath(xpath) if xpath else []
                            items2 = root.xpath(xpath2) if xpath2 else []
                            
                            def is_all_match(item_list):
                                return item_list and all(item == right_answer for item in item_list)
                                ## 위 코드와 동일한 기능(풀어서 설명)
                                # 리스트가 비어 있으면 False 반환
                                # if not item_list:
                                #     return False

                                # # 리스트의 모든 항목이 right_answer와 같은지 검사
                                # for item in item_list:
                                #     if item != right_answer:
                                #         return False  # 하나라도 다르면 False 반환

                                # return True  # 전부 일치하면 True 반환

                            if is_all_match(items):
                                user_answer = right_answer
                            elif is_all_match(items2):
                                user_answer = right_answer
                            else:
                                user_answer = ""
                                points = 0
                                
                            self.evaluate_answer(scoring, user_answer, right_answer, points)

                        # [1-16] ◈ 행사안내 ◈ 
                        # 특수문자와 글자의 속성이 같고 문서 내부에 '행사안내'와 같은 문자가 있을 경우
                        # 유사도 문제로 의도치 않은 다른 부분의 텍스트 속성이 채점되는것을 방지하고자
                        # 해당 문자를 포함하는 모든 문단의 속성을 판단해
                        # 정렬값이 정답과 일치하는 경우 정답으로 채점
                        elif (category or "") == "Align":
                            match_str = criterion.get('match_str', None)
                            
                            xpath = xpath.replace('{match_str}', match_str)
                            items = root.xpath(xpath)
                            
                            for item in items:
                                user_answer = item
                                self.evaluate_answer(scoring, user_answer, right_answer, points)
                                if scoring['points'] > 0:
                                    break
                        
                        elif (category or "") == "majorGridlines":
                            # 줄/칸 전환여부 확인
                            # table_col_count = root.xpath("//SECTION[2]//TABLE/@ColCount")
                            table_col_count = root.xpath("//TABLE/@ColCount")
                            
                            # print("🟡테이블 열 개수: ", int(table_col_count[0]) if table_col_count else 0)
                            
                            chart_ser_count = chart_tree.xpath("count(//c:ser)", namespaces=namespaces) if chart_xpath else 0
                            
                            # print("🟡차트 데이터 개수: ", int(chart_ser_count) if isinstance(chart_ser_count, (int, float)) else 0)
                            
                            isXYtransposed = False
                            if table_col_count and chart_ser_count:
                                if int(chart_ser_count) > int(table_col_count[0])-1:
                                    isXYtransposed = True
                                
                            # 값 축 주눈금선 존재하는지 여부 확인
                            items = chart_tree.xpath(chart_xpath, namespaces=namespaces) if chart_xpath else []
                            
                            for item in items:
                                # item이 존재하면 True, 없으면 False
                                user_answer = (item is not None) and isXYtransposed
                                
                                # 정답과 수험자 답안 비교
                                self.evaluate_answer(scoring, user_answer, right_answer, points)
                                if scoring['points'] > 0:
                                    break
                        
                        # 글상자 정렬 [2-10] 문항
                        # 2페이지의 글상자의 ParaShape ID를 동적으로 찾아서 채점
                        elif "TextBoxAlign" in (category or ""):
                            if has_page2_rectangle:
                                parashape_list = rect_parashapes
                            else:
                                parashape_list = root.xpath(xpath)
                            
                            # parashape ID가 있는 경우에만 xpath 치환 & 실행
                            result_items = []
                            if parashape_list:
                                for parashape_id in parashape_list:
                                    exec_xpath = xpath.replace('{rect_parashape_id}', parashape_id)
                                    items = root.xpath(exec_xpath)
                                    result_items.extend(items)
                            else:
                                # RECTANGLE이 없으면 items는 빈 리스트
                                items = [None]
                                                    
                            for item in items:
                                user_answer = item
                                self.evaluate_answer(scoring, user_answer, right_answer, points)
                                if scoring['points'] > 0:
                                    break
                                
                        # 정답이 하나인 경우
                        # elif (category or "") in ["OneAnswer", "ChartOneAnswer"]:
                        elif "OneAnswer" in (category or ""):             
                            items = []
                            items2 = []        
                            if has_page2_rectangle:
                                for p in page2_ptags:
                                    items.extend(p.xpath(xpath) if xpath else "")
                                    items2.extend(p.xpath(xpath2) if xpath2 else "")
                            else:      
                                items = root.xpath(xpath) if xpath else []
                                items2 = root.xpath(xpath2) if xpath2 else []
                            
                            # 차트 XML에서 정답을 찾는 경우
                            # 차트 종류가 
                            # 세로막대형이면 x축이 카테고리(catAx) y축이 값(valAx)
                            # 가로막대형이면 x축이 값(valAx) y축이 카테고리(catAx)
                            if "ChartOneAnswer" in category:
                                # 하드코딩이라 [2-45문항] 변경시 수정 필요
                                # chart_type = self.scoring_criteria["2"]["45"]["chart_type"].replace(" ","")
                                
                                # chart_type 변수의 경우 45번 문항을 먼저 채점하므로 
                                # xy축의 변경이 필요한 53~58번 문항 채점시에 chart_type변수에 차트모양의 정보는 입력 되어있음
                                
                                # 가로 차트일 경우에만 x축과 y축을 바꿔줌
                                # 세로, 꺾은선, 원형 차트의 경우 그대로 사용
                                if "가로" in chart_type:
                                    if "catAx" in chart_xpath:
                                        chart_xpath = chart_xpath.replace("catAx", "valAx")
                                    elif "valAx" in chart_xpath:
                                        chart_xpath = chart_xpath.replace("valAx", "catAx")
                                
                                # 분산형 차트의 경우 
                                # xml파일 내부에서 x, y축 모두 valAx로 표기되고
                                # valAx의 axPos(축의위치) 속성값으로 축의 방향을 구분함
                                elif "분산형" in chart_type:
                                    if "catAx" in chart_xpath:
                                        # valAx[c:axPos/@val='b'] : 값축의 위치가 bottom (가로,x축)
                                        chart_xpath = chart_xpath.replace("catAx", "valAx[c:axPos/@val='b']")
                                    elif "valAx" in chart_xpath:
                                        # valAx[c:axPos/@val='l'] : 값축의 위치가 left (세로,y축)
                                        chart_xpath = chart_xpath.replace("valAx", "valAx[c:axPos/@val='l']") 
                                    
                            chart_items = chart_tree.xpath(chart_xpath, namespaces=namespaces) if chart_xpath else []
                            
                            for item in chain(items, items2, chart_items):
                                user_answer = item.replace(" ", "") if isinstance(item, str) else item
                                right_answer = right_answer.replace(" ", "")
                                
                                self.evaluate_answer(scoring, user_answer, right_answer, points)
                                if scoring['points'] > 0:
                                    break
                        
                        # [2-6] 테두리 이중실선 1.00mm
                        elif "LineShape" in (category or ""):
                            line_shapes = []
                            if has_page2_rectangle:
                                for p in page2_ptags:
                                    line_shapes.extend(p.xpath(xpath) if xpath else [])
                            else:
                                line_shapes = root.xpath(xpath) if xpath else []
                            
                            user_answer = {
                                'Style': None,
                                'Width': None
                            }
                            
                            for line_shape in line_shapes:
                                style = line_shape.get("Style")
                                width = line_shape.get("Width")
                                
                                user_answer['Style'] = style
                                user_answer['Width'] = width
                                
                                self.evaluate_answer(scoring, user_answer, right_answer, points)
                                if scoring['points'] > 0:
                                    break

                        # 사용자 입력값이 mm단위인 경우
                        # elif (category or "") == "mmSize":
                        elif "mmSize" in (category or ""):
                            items = []
                            if has_page2_rectangle:
                                for p in page2_ptags:
                                    items.extend(p.xpath(xpath))
                                
                            else:
                                items = root.xpath(xpath)
                                
                            # 오차범위 설정
                            # 한글 프로그램 내부에서 드물게 0mm이지만 1pt로 저장되는 경우가 있음
                            # 
                            # XML파일의 요소 옵션값은 내부적으로 1=0.01pt
                            # 이 경우를 대비하여 tolerance를 10으로 설정 (1pt=약0.04mm 만큼의 오차 혀용)
                            error_range = criterion.get('tolerance', 10)
                            
                            # JSON 파일 value키값에 mm나 공백이 입력될 경우 제거
                                # 예) "80.2 mm" >> 80.2 로 변환
                            float_string = right_answer.strip().replace("mm", "")
                            right_answer = self.convert_mm_to_pt(float(float_string))
                            
                            if not items:
                                scoring['points'] = 0
                            else:
                                for item in items:
                                    user_answer = float(item)

                                    self.evaluate_answer(scoring, user_answer, right_answer, points, method="tolerance", tolerance=error_range)
                                    
                                    if scoring['points'] > 0:
                                        break
                                
                        elif (category or "") == "ParaShape":
                            items = root.xpath(xpath)
                            
                            for item in items:
                                user_answer = {
                                    'Left': float(item.get('Left', 0)) / 200,
                                    'Indent': float(item.get('Indent', 0)) / -200,
                                }
                            
                                # 정답과 수험자 답안 비교
                                self.evaluate_answer(scoring, user_answer, right_answer, points, method="equal")
                            
                                if scoring['points'] > 0:
                                    break       
                        
                        # Boolean 타입 정답인 경우
                        elif (category or "") == "Boolean":
                            items = root.xpath(xpath) if xpath else False
                            items2 = root.xpath(xpath2) if xpath2 else False
                            chart_items = chart_tree.xpath(chart_xpath, namespaces=namespaces) if chart_xpath else False
                                
                            user_answer = bool( items or items2 or chart_items )
                            
                            self.evaluate_answer(scoring, user_answer, right_answer, points)
                                    
                        # 채점기준표 파일에 작성된 rgb값을 그대로 읽어와 HML파일 요소의 int형 rgb값과 비교
                        elif "Color" in (category or ""):
                            items = []
                            items2 = []
                            if has_page2_rectangle:
                                for p in page2_ptags:
                                    items.extend(p.xpath(xpath) if xpath else "")
                                    items2.extend(p.xpath(xpath2) if xpath2 else "")
                                
                            else:      
                                items = root.xpath(xpath) if xpath else []
                                items2 = root.xpath(xpath2) if xpath2 else []
                        
                            rgb_text = right_answer
                            
                            # 정규식을 이용해 숫자만 리스트로 추출
                            numbers = re.findall(r'\d+', rgb_text)
                            r, g, b = map(int, numbers) if len(numbers) == 3 else None
                            
                            # 콤마(,)로 구분된 문자열을 정수형으로 변환
                            # r, g, b = map(int, rgb_text.split(','))
                            
                            rgb_int = (b << 16) + (g << 8) + r 
                            
                            # items, items2를 순차적으로 순회
                            for item in chain(items, items2):
                                user_answer = int(item)
                                self.evaluate_answer(scoring, user_answer, rgb_int, points, method="equal")
                                if scoring['points'] > 0:
                                    break

                        # 문단 첫글자 장식 채점
                        elif (category or "") == "TwoLineSize":
                            items = root.xpath(xpath)
                            error_range = criterion.get('tolerance', 0)
                            for item in items:
                                user_answer = {
                                    "Height": int(item.get('Height', 0)),
                                    "Width": int(item.get('Width', 0))
                                }
                                self.evaluate_answer(scoring, user_answer, right_answer, points, method="tolerance", tolerance=error_range)
                            
                                if scoring['points'] > 0:
                                    break
                        
                        # 폰트명 
                        elif "FontName" in (category or ""):
                            if has_page2_rectangle: 
                                charshape_list = rect_charshapes
                                
                            else:      
                                charshape_list = root.xpath(xpath)
                                
                            # 문자속성이 없는 경우
                            if not charshape_list:
                                user_answer = ""
                                self.evaluate_answer(scoring, user_answer, right_answer, points, method="equal")
                            else:
                                require_all_match = ("TableFontName" in category)
                                any_match = False
                                all_match = True
                                matched_user_answer = None  # 일치하는 user_answer를 기억

                                for charshape_id in charshape_list:
                                    font_id = root.xpath(f"//CHARSHAPE[@Id='{charshape_id}']/FONTID/@Hangul")
                                    if not font_id:
                                        all_match = False
                                        continue

                                    font_name = root.xpath(f"//FONTFACE[@Lang='Hangul']/FONT[@Id='{font_id[0]}']/@Name")
                                    
                                    if not font_name:
                                        all_match = False
                                        continue

                                    # 공백 제거
                                    user_answer = font_name[0].replace(" ", "")
                                    right_answer = right_answer.replace(" ","")
                                    
                                    # 접두어 제거
                                    if right_answer in ["견고딕", "중고딕"]:
                                        user_answer = user_answer.replace("한양", "")

                                    if user_answer == right_answer:
                                        any_match = True
                                        matched_user_answer = user_answer
                                    else:
                                        all_match = False
                                        if require_all_match:
                                            break

                                if require_all_match:
                                    score = points if all_match else 0
                                    self.evaluate_answer(scoring, user_answer, right_answer, score)
                                else:
                                    score = points if any_match else 0
                                    self.evaluate_answer(scoring, matched_user_answer if any_match else "", right_answer, score)

                        elif "FontSize" in (category or ""):
                            if has_page2_rectangle:
                                charshape_list = rect_charshapes
                                
                            else:
                                charshape_list = root.xpath(xpath)
                            
                            # CharShape ID가 있는 경우에만 xpath 치환 & 실행
                            result_items = []
                            if charshape_list:
                                for charshape_id in charshape_list:
                                    exec_xpath = xpath.replace('{rect_charshape_id}', charshape_id)
                                    items = root.xpath(exec_xpath)
                                    result_items.extend(items)
                                    
                            else:
                                # RECTANGLE이 없으면 items는 빈 리스트
                                items = [None]
                                                    
                            for item in items:
                                user_answer = item
                                self.evaluate_answer(scoring, user_answer, right_answer, points)
                                if scoring['points'] > 0:
                                    break
                            
                                                            
                        # 폰트 속성
                        elif (category or "") == "FontAttribute":
                            # 하이퍼링크 처리
                            
                            # 1. 하이퍼링크를 포함하는 P요소를 가져옴
                            # 2. 그 P요소의 자손 CHAR태그에 있는 텍스트를 하나의 문자열로 변환
                            # 3. P요소의 문자열과 채점하려는 문자열이 일치하는지 확인
                            hyperlink_xpath = criterion.get('hyperlink_ptag', None)
                            hyperlink_ptag = root.xpath(hyperlink_xpath) if hyperlink_xpath else None
                            
                            p_tag_text_list = extract_char_text_from_p(hyperlink_ptag) if hyperlink_ptag else []
                            hyperlink_text = search_value.replace(" ", "") if search_value else ""

                            # search_value가 hyperlink문자열에 포함되어 있는지 확인
                            # search_value가 hyperlink인 경우와 아닌경우를 구분해 채점
                            search_in_hyperlink = False
                            if hyperlink_text and any(hyperlink_text in text for text in p_tag_text_list):
                                search_in_hyperlink = True
                            else:
                                search_in_hyperlink = False
                            
                            # hyperlink가 아닌 경우(일반적인 텍스트 일 경우)
                            # 하이퍼링크를 포함한 P태그가 없거나 search_value값이 하이퍼링크텍스트에 포함되어 있지 않을 경우
                            if not hyperlink_ptag or not search_in_hyperlink:
                                charshape_list = root.xpath(xpath)
                                if not charshape_list:
                                    charshape = None
                                    user_answer = None
                                else:
                                    for charshape in charshape_list:
                                        font_attribute = charshape.find(right_answer)
                                        if font_attribute is not None:
                                            user_answer = font_attribute.tag
                                        else:
                                            user_answer = None
                                
                                        self.evaluate_answer(scoring, user_answer, right_answer, points, method="equal")        
                                        
                                        if scoring['points'] > 0:
                                            break

                            # 하이퍼링크인 경우
                            # elif hyperlink_ptag and search_in_hyperlink:
                            else:
                                p_elements = hyperlink_ptag
                                
                                for p in p_elements:
                                    # 수험자가 입력한 텍스트 중 하이퍼링크가 들어간 문단의 모든 텍스트를 가져와
                                    # 채점하고자 하는 (정답) 하이퍼링크 텍스트와 시작 위치를 비교
                                    # (예시)
                                    # [수험자입력] 1. 사전등록 : 서울 국제 도서 박람회 흠페이지(http://www.ind.or.kr) 참조
                                    # [정답] 서울 국제 도서 박람회 흠페이지(http://www.ind.or.kr) 참조
                                    # 수험자 텍스트의 "1. 사전등록" 부분을 제외하고 난 뒤
                                    # 남은 "서울 국제 도서 박람회 흠페이지(http://www.ind.or.kr) 참조"의 정답 부분과 유사도를 비교
                                    
                                    text_list = p.xpath(".//CHAR/text()")
                                    full_text = ''.join(text_list).replace(" ", "")
                                    # print("full_text: ", full_text)
                                    
                                    # 채점하고자 하는 문자열 (search_value)의 첫 문자
                                    first_char = search_value[0]
                                    
                                    # 수험자 답안에서 첫 문자 인덱스 위치
                                    user_answer_first_index = full_text.find(first_char)
                                    
                                    if user_answer_first_index != -1:
                                        # 수험자 답안에서 첫 문자 인덱스 위치부터 search_value 길이만큼 잘라서 비교
                                        trimmed_full_text = full_text[user_answer_first_index:]
                                    else:
                                        trimmed_full_text = full_text
                                    
                                    # 두 문자열의 유사도 계산
                                    similarity = difflib.SequenceMatcher(None, trimmed_full_text, hyperlink_text).ratio()
                                    
                                    # 두 문자열의 유사도에 따라 하이퍼링크 확인
                                    # 유사도가 낮은 경우 오답처리
                                    if similarity < 0.7:
                                        self.evaluate_answer(scoring, user_answer, right_answer, 0, method="equal")
                                    
                                    # 유사도가 높은 경우
                                    else:
                                        inside_field = False
                                        charshape_list = []

                                        for elem in p.iter():
                                            # 시작 지점 확인
                                            # FIELDBEGIN태그와 FIELDEND태그 사이
                                            if elem.tag == "FIELDBEGIN":
                                                inside_field = True
                                            elif elem.tag == "FIELDEND":
                                                inside_field = False
                                                
                                            # 하이퍼링크 텍스트가 CharShape 속성값이 앞의 텍스트와 다른 경우
                                            # http://www.ihd.or.kr 주소가 TEXT 부모태그를 가지는 경우
                                            # [예시]
                                            # <TEXT CharShape="21">
                                            #   <CHAR>http://www.ihd.or.kr)</CHAR>
                                            # </TEXT>
                                            # 해당 부모 TEXT태그의 CharShape속성을 확인
                                            elif inside_field and elem.tag == "TEXT":
                                                charshape = elem.get("CharShape")
                                                print('charshape : ', charshape)
                                                if charshape:
                                                    charshape_list.append(charshape)
                                            
                                            # 하이퍼링크 텍스트가 CharShape 속성값이 앞의 텍스트와 같은 경우
                                            # http://www.ihd.or.kr 주소가 TEXT부모태그 없이 CHAR로만 있는경우
                                            # [예시]
                                            # <CHAR>http://www.ihd.or.kr)</CHAR>
                                            # FIELDBEGIN밖의 TEXT태그의 CharShape속성을 확인해야 한다
                                            elif inside_field and elem.tag == "CHAR":
                                                parent = elem.getparent()
                                                
                                                charshape = parent.get("CharShape")
                                                print('charshape : ', charshape)
                                                if charshape:
                                                    charshape_list.append(charshape)
                                                
                                    
                                        # 하이퍼링크에 해당하는 P태그 내 존재하는 charshape ID값 모두를 비교해 해당 속성(ITALIC, BOLD, UNDERLINE) 확인
                                        # 모든 charshape ID값이 정답과 일치하는 경우에만 점수 부여
                                        all_attributes_match = True
                                        if charshape_list:    
                                            for charshape_id in charshape_list:
                                                charshape = root.xpath(f"//CHARSHAPE[@Id='{charshape_id}']")
                                                
                                                # 속성 태그가 존재하는지 확인
                                                font_attribute = charshape[0].find(right_answer)
                                                if font_attribute is None:
                                                    user_answer = None
                                                    all_attributes_match = False
                                                    break
                                                    
                                                else:
                                                    user_answer = font_attribute.tag
                                        
                                        if all_attributes_match:
                                            self.evaluate_answer(scoring, user_answer, right_answer, points, method="equal")
                                        else:
                                            self.evaluate_answer(scoring, user_answer, right_answer, 0, method="equal")

                        elif (category or "") == "LineSpacing":
                            page1_ptags = pages.get('Page_1', [])
                            
                            # 줄간격이 하나라도 일치하지 않을 경우 오답처리
                            linespacing_match = True
                            for p in page1_ptags:
                                parashape_id = p.get('ParaShape')
                                xpath = xpath.replace('{parashape_id}', parashape_id)
                                linespacing = root.xpath(xpath)
                                user_answer = linespacing[0]
                                
                                # print("🟡줄간격: ", user_answer)
                                if user_answer != right_answer:
                                    linespacing_match = False
                                    break
                            
                            # 문단 첫 글자 크기에 따라 채점 기준 추가 (050624)
                            # 1. 기본 줄간격 160% 일 때 26pt
                            # 2. 해당 문제의 정답 줄간격 (180% = 28pt / 200% = 30pt )
                            # 두 경우의 글자 크기가 아니라면 오답처리
                            firstword = criterion.get('first_word', None)
                            result = root.xpath(f"//CHARSHAPE[@Id=//RECTANGLE//TEXT[CHAR[text()='{firstword}']]/@CharShape]/@Height")
                            firstword_size = result[0] if result else None

                            if (right_answer == '180' and firstword_size not in ['2600', '2800', None]) or (right_answer == '200' and firstword_size not in ['2600', '3000', None]):
                                linespacing_match = False
                            
                            if linespacing_match is True:
                                self.evaluate_answer(scoring, user_answer, right_answer, points, method="equal")
                            else:
                                self.evaluate_answer(scoring, user_answer, right_answer, 0, method="equal")
                            
                        
                        # 특수문자 갯수 채점
                        elif (category or "") == "SpecialChar":
                            ch1 = criterion.get('char1', None)
                            ch2 = criterion.get('char2', None)
                            ch3 = criterion.get('char3', None)
                            xpath = xpath.replace('{char1}', ch1)
                            xpath2 = xpath2.replace('{char2}', ch2)
                            xpath3 = xpath3.replace('{char3}', ch3)
                            ch1_str = root.xpath(xpath)
                            ch2_str = root.xpath(xpath2)
                            ch3_str = root.xpath(xpath3)
                            sum_char = 0
                            
                            # char1 요소에서 특수문자 갯수 세기 (최대 2점)
                            for text in ch1_str or []:
                                ch1_count = text.count(ch1)
                                sum_char += ch1_count
                                if sum_char >= 2:
                                    sum_char = 2
                                    break
                            
                            # char2 요소에서 특수문자 갯수 세기 (최대 1점)
                            # char1과 char2가 다른 경우 (예: ▶ 행사안내 ◀)
                            if (ch1 != ch2) and ch2_str:
                                for text in ch2_str or []:
                                    ch2_count = text.count(ch2)
                                    if ch2_count > 1:
                                        ch2_count = 1
                                    sum_char += ch2_count
                                
                            # char3 요소에서 특수문자 갯수 세기 (최대 1점)
                            if ch3_str:
                                for text in ch3_str or []:
                                    ch3_count = text.count(ch3)
                                    if ch3_count > 1:
                                        ch3_count = 1
                                    sum_char += ch3_count
                                                        
                            user_answer = sum_char
                            
                            self.evaluate_answer(scoring, user_answer, right_answer, points, method="partial_score")
                            
                        # 쪽 테두리 (이중 실선, 머리말 포함) 설정
                        elif (category or "") == "PageBorder":
                            user_answer = {
                                "header_inside": False,
                                "all_double_slim": False
                            }
                            
                            # 머릿말 포함 객체가 하나라도 있으면 정답
                            header_inside_elements = root.xpath(xpath)
                            for header_inside in header_inside_elements:
                                # print("머릿말포함: ",header_inside)
                                if "true" in header_inside:
                                    user_answer["header_inside"] = True
                                    break
                            
                            # BORDERFILL요소의 자녀
                            # LEFTBORDER, RIGHTBORDER, TOPBORDER, BOTTOMBORDER 요소의 Type속성이 
                            # 모두 DoubleSlim이면 정답
                            border_tags = ["LEFTBORDER", "RIGHTBORDER", "TOPBORDER", "BOTTOMBORDER"]
                            
                            borderfill_elements = root.xpath(xpath2)
                            for borderfill in borderfill_elements:
                                all_double_slim = True
                                
                                for tag in border_tags:
                                    element = borderfill.find(tag)
                                    
                                    if (element is None) or (element.get("Type") != "DoubleSlim"):
                                        all_double_slim = False
                                        break

                                #모든 BORDER 태그의 Type 속성이 'DoubleSlim'인 객체가 있다면 반복문 탈출
                                if all_double_slim:
                                    user_answer["all_double_slim"] = True
                                    break

                            self.evaluate_answer(scoring, user_answer, right_answer, points, method="equal")

                        # 다단 확인 [2-3]문항
                        elif (category or "") == "TwoColumn":
                            page2_ptags = pages.get('Page_2', [])
                            
                            for p in page2_ptags:
                                column_count = p.xpath(xpath)
                                user_answer = column_count[0] if column_count else '0'
                                
                                if user_answer == right_answer:
                                    self.evaluate_answer(scoring, user_answer, right_answer, points, method="equal")
                                    
                                    # P태그들 중 하나라도 다단이 존재할 경우 정답처리
                                    if scoring['points'] > 0:
                                        break
                                    
                        # 한자
                        elif (category or "") == "Hanja":                        
                            # 점수 계산
                            score = 0
                            max_score = points
                            
                            word_list = criterion.get('word', [])
                            # 부분점수 (최대점수에서 한자 갯수만큼 나눈 몫)
                            score_per_pair = max_score // len(word_list)
                            
                            # 한자가 5개 고정일 경우
                            # score_per_pair = 2

                            for kor, chn in word_list:
                                # XPath 구문 구성 및 실행
                                exec_xpath = xpath.replace('{kor}', kor).replace('{chn}', chn)
                                matched = root.xpath(exec_xpath)
                                
                                if matched:
                                    score += score_per_pair

                            # 최대 점수 초과 방지
                            user_answer = min(score, max_score)
                            
                            self.evaluate_answer(scoring, user_answer, right_answer, points, method="partial_score")
                            
                        elif (category or "") == "ChartType":
                            chart_type_list = {
                                '꺾은선형': "//c:lineChart[c:grouping[@val='standard']]",
                                '묶은가로막대형': "//c:barChart[c:barDir[@val='bar'] and c:grouping[@val='clustered']]",
                                '누적가로막대형': "//c:barChart[c:barDir[@val='bar'] and c:grouping[@val='stacked']]",
                                '원뿔형누적가로막대형': "//c:bar3DChart[c:barDir[@val='bar'] and c:grouping[@val='stacked']]",
                                
                                '묶은세로막대형': "//c:barChart[c:barDir[@val='col'] and c:grouping[@val='clustered']]",
                                '누적세로막대형': "//c:barChart[c:barDir[@val='col'] and c:grouping[@val='stacked']]",
                                '원형': "//c:pieChart",
                                '분산형': "//c:scatterChart",
                                '표식만있는분산형': "//c:scatterChart[c:scatterStyle[@val='marker']]",
                            }
                            chart_type = criterion.get('chart_type').replace(" ","")
                            
                            # 입력한 chart_type에 해당하는 xpath를 가져옴
                            chart_xpath = chart_type_list[chart_type]

                            # xpath를 사용하여 차트 요소가 있는지 확인
                            user_answer = bool(chart_tree.xpath(chart_xpath, namespaces=namespaces))
                            self.evaluate_answer(scoring, user_answer, right_answer, points)
                        
                        # 하이퍼링크 채점 [1-30] 문항    
                        elif "hyperlink" in (category or ""):
                            # XPath에서 searchValue가 들어간 CHAR 태그 추출
                            url_tags = root.xpath(xpath) if xpath else []
                            print("hyperlink url_tags:", url_tags)

                            if url_tags is None or len(url_tags) == 0:
                                # 해당하는 CHAR 태그가 없는 경우 오답처리
                                self.evaluate_answer(scoring, False, right_answer, 0)
                                continue
                            
                            has_hyperlink = False  # FIELDBEGIN/FIELDEND 둘 다 존재하는 경우만 True

                            for url_tag in url_tags:
                                if not isinstance(url_tag, etree._Element):
                                    continue
                                char_text = (url_tag.text or "").strip()

                                # 가장 가까운 조상 P태그 찾기
                                p_parent = url_tag.xpath("ancestor::P[1]")
                                if not p_parent:
                                    continue
                                p = p_parent[0]

                                # 같은 P 안에 FIELDBEGIN과 FIELDEND 존재 여부 확인
                                has_fieldbegin = bool(p.xpath(".//FIELDBEGIN"))
                                has_fieldend = bool(p.xpath(".//FIELDEND"))

                                if has_fieldbegin and has_fieldend:
                                    has_hyperlink = True
                                    break

                            # 점수 처리 (하이퍼링크가 하나라도 설정되어 있으면 오답)
                            if not has_hyperlink:
                                self.evaluate_answer(scoring, True, right_answer, points)
                            else:
                                self.evaluate_answer(scoring, False, right_answer, 0)
                            
                    finally:
                        # 문항 채점 결과를 리스트에 입력
                        onePersonResult['score_results'].append(scoring)
                        print(f'scoring: {scoring}')
                    
                onePersonResult['partial_scores'].append({
                    'section': section_id,
                    'score': self.partial_score
                })
            onePersonResult['total_score'] = self.total_score
            return onePersonResult
                
        except etree.ParseError as e:
            return {
                'filename': os.path.basename(xml_file),
                'error': f"XML 파싱 오류: {str(e)}",
                'total_score': 0
            }
            
    def binary_to_chartxml(self, xml_path):
        tree = etree.parse(xml_path)
        root = tree.getroot()
        
        binary_data = root.xpath('//BINDATA[@Id=//BINITEM[@Format="OLE"]/@BinData]/text()')
        if not binary_data:
            return None
        binary_data = binary_data[0].encode('utf-8')

        # <BINDATA ...> 태그와 그 내부 내용을 삭제합니다.
        encoded_data = re.sub(b'<BINDATA.*?>', b'', binary_data)
        encoded_data = encoded_data.replace(b'</BINDATA>', b'')
        encoded_data = encoded_data.replace(b'\r\n', b'')

        # base64 디코딩을 수행합니다.
        decoded_data = base64.b64decode(encoded_data+b'==')
            
        # 디코딩된 데이터 내용 중 xml 형식만 추출할 때 <c:chartSpace>, </c:chartSpace> 사이의 데이터만 추출.
        start = decoded_data.find(b'<?xml')
        print(start)
        end = decoded_data.find(b'</c:chartSpace>')
        print(end)
        xml_data = decoded_data[start:end+len(b'</c:chartSpace>')]

        # xml 데이터가 없는 경우 None을 반환합니다.
        if -1 in [start, end]:
            return None
        
        # 디코딩된 데이터를 파일로 저장합니다.
        base_filename = os.path.splitext(xml_path)[0]
        new_filename = f'{base_filename}.xml'
        with open(new_filename, 'wb') as file:
                file.write(xml_data)
        
        return xml_data
    
    def typo_check(self, correct_answer_file, user_answer_file, chart_xml):
        
        # 문자열 리스트를 필터링
        def clean_text_list(text_list, ignore_words=None):
            result = []
            for text in text_list:
                if ignore_words:
                    text = text.replace(ignore_words, '')
                text = text.replace(' ', '')  # 공백 제거
                text = re.sub(r'\d+\.\s*|-', '', text)  # 숫자. / - 제거
                result.append(text)
            return result
        
        # 1. 텍스트 추출
        # 2. 공백제거, 특정 형식 제거
        # 3. 리스트를 문자열로 변환
        
        user_answer_root = etree.parse(user_answer_file).getroot()
        correct_answer_root = etree.parse(correct_answer_file).getroot()
        
        # xpath로 바이너리 부분추출
        user_input_text = user_answer_root.xpath('//CHAR//text()[not(ancestor::HEADER) and not(ancestor::TABLE)]')
        correct_input_text = correct_answer_root.xpath('//CHAR//text()[not(ancestor::HEADER) and not(ancestor::TABLE)]')
        
        # 테이블 구간 추출
        user_table_text = user_answer_root.xpath('//TABLE//CHAR//text()')
        correct_table_text = correct_answer_root.xpath('//TABLE//CHAR//text()')
        
        user_chart_title = ""
        correct_chart_title = self.scoring_criteria["2"]["50"]["searchValue"]
        
        # 차트 XML에서 차트제목 추출
        if chart_xml is not None:
            chart_xml_tree = etree.fromstring(chart_xml)
            ns = {'c': 'http://schemas.openxmlformats.org/drawingml/2006/chart',
                'a': 'http://schemas.openxmlformats.org/drawingml/2006/main'}
            xpath_expr = '/c:chartSpace/c:chart/c:title/c:tx/c:rich/a:p/a:r/a:t'
            
            # 차트 제목 추출
            chart_title = chart_xml_tree.xpath(xpath_expr, namespaces=ns)
            
            # 차트 제목이 존재하는 경우
            user_chart_title = chart_title[0].text if chart_title else ""

        try :
            ignore_word = self.scoring_criteria["2"]["29"]["ignoreWord"]
            # 특정 단어 제거
            # 오타와 누락의 경우만 판단하면 정상작동하지만
            # 추가 된 단어의 경우를 채점기준에 추가하면 정확하게 채점 되지 않을 수 있음
            # [정답] Hybrid [실제작성] 
            user_input_text = [text.replace(ignore_word, '') for text in user_input_text]
            correct_input_text = [text.replace(ignore_word, '') for text in correct_input_text]
        except (KeyError, IndexError, AttributeError):
            ignore_word = None
        
        # print(f"ignore_word: {ignore_word}")
        
        # 문자열 필터링
        correct_input_text = clean_text_list(correct_input_text, ignore_word)
        user_input_text = clean_text_list(user_input_text, ignore_word)
        
        correct_table_text = clean_text_list(correct_table_text)
        user_table_text = clean_text_list(user_table_text)
        
        correct_chart_title = clean_text_list(correct_chart_title)
        user_chart_title = clean_text_list(user_chart_title)
    
        # 리스트를 하나의 문자열로 변경
        correct_input_text_str = ''.join(correct_input_text)
        user_input_text_str = ''.join(user_input_text)

        correct_table_text_str = ''.join(correct_table_text)
        user_table_text_str = ''.join(user_table_text)
        
        correct_chart_title_str = ''.join(correct_chart_title)
        user_chart_title_str = ''.join(user_chart_title)
        
        print("user_input_text as string:")
        print(user_input_text_str)
        print("\n")
        print("correct_input_text_answer as string:")
        print(correct_input_text_str)
        
        # 문자열의 차이를 비교
        text_diff = difflib.ndiff(correct_input_text_str, user_input_text_str)
        table_text_diff = difflib.ndiff(correct_table_text_str, user_table_text_str)
        chart_title_diff = difflib.ndiff(correct_chart_title_str, user_chart_title_str)
        
        # text_diff = difflib.ndiff(correct_input_text, user_input_text)
        # table_text_diff = difflib.ndiff(correct_table_text, user_table_text)
        # chart_title_diff = difflib.ndiff(correct_chart_title, user_chart_title)
        # diff_list = list(diff)
        text_list = list(text_diff)
        table_list = list(table_text_diff)
        chart_list = list(chart_title_diff)
        
        diff_list = text_list + table_list + chart_list
        # diff_list = text_list + table_list
        
        # 차이점을 정리하여 result_diff에 저장
        result_diff = []
        
        # 누락 된 단어만 따로 리스트로 저장
        missing_list = []
        
        # 오타와 누락된 단어 리스트 저장
        error_missing_list = []
        
        skip_next = False

        for i, line in enumerate(diff_list):
            if skip_next:
                skip_next = False
                continue
            # diff_list의 line 시작이 '-'이면서 다음 line이 '+'이면 두 line을 붙여서 맞춤법이 틀린 단어로 판단
            if line.startswith('- '):
                # 오타
                if i + 1 < len(diff_list) and diff_list[i + 1].startswith('+ '):
                    line = line.replace('- ', '-')
                    next = diff_list[i + 1].replace('+ ', '')
                    result_diff.append(line+'=>'+next)
                    error_missing_list.append(line+'=>'+next)
                    skip_next = True
                # 누락
                else:
                    line = line.replace('- ', '-')
                    result_diff.append(line)
                    missing_list.append(line)
                    error_missing_list.append(line)
            # 없어도 되는 글자가 있는 경우 (추가)
            elif line.startswith('+ '):
                line = line.replace('+ ', '+')
                result_diff.append(line)

        # result_diff 출력
        # print("\nResult Differences:")
        # for diff in result_diff:
        #     print(diff)
        
        # result_diff 배열의 길이를 맨 앞에 저장
        
        # 모든 차이를 계산해 점수 차감
        # temp = 40 - min(len(result_diff)*2, 40)
        
        # 누락된 텍스트만 계산해 점수 차감
        # temp = 40 - min(len(missing_list)*2, 40)
        
        # 2503회 기준 오타 1개당 [2점]->[1점] 차감
        temp = 40 - min(len(error_missing_list)*1, 40)
        
        self.set_typo_score(temp)

        result_diff.insert(0, temp)
        return result_diff
    
    # XML 파일 채점
    def score_directory(self, xml_directory, correct_answer_file):
        # xml 파일 불러오기
        xml_files = Path(xml_directory).glob('*.hml')
    
        # 채점결과 저장할 리스트
        score_results = []
        
        for user_answer_file in xml_files:
            score_result = {}
            chart_xml = self.binary_to_chartxml(user_answer_file)
            score_result['typo'] = self.typo_check(correct_answer_file, user_answer_file, chart_xml)
            score_result['score'] = self._score_xml_file(user_answer_file, chart_xml)
            # score_result['score']['score_results'][2]['points'] = score_result['typo'][0]
            score_results.append(score_result)    
        return score_results

    def parse_filename(self, filename):
        if isinstance(filename, dict):
            filename = filename.get('파일명', '')
        match = re.match(r'.*-(\d+)-(.+)\.hml', filename)
        if match:
            number = match.group(1)
            name = match.group(2)
            return number, name

        return None, None
    
    def export_to_excel(self, results, output_path=None):
        if output_path is None:
            timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") #연월일_시분초
            # timestamp = datetime.now().strftime("%Y%m%d") #연월일
            output_path = f"scoring_results_{timestamp}.xlsx"

        summary_data = []
        detail_data = []
        typo_data = []
        
        for temp in results:
            # 요약 정보
            result = temp['score']
            summary_row = {
                '파일명': result['filename'],
                '총점': result.get('total_score', 0)
            }
            if 'error' in result:
                summary_row['오류'] = result['error']
                
            summary_data.append(summary_row)

            # 상세 정보
            if 'score_results' in result:
                filename = {'파일명': result['filename']}
                number, name = self.parse_filename(filename)
                if (number or name) is None:
                    detail_row = {'채점항목': result['filename'] }
                else:
                    detail_row = {'채점항목':f"{number}-{name}"}
                
                section_num = None
                row_index = []
                for i, score_result in enumerate(result['score_results']):
                    current_section = score_result['section']
                    
                    if section_num is None:
                        section_num = current_section
                    
                    # 다음 섹션(문제0 => 문제1)로 넘어갔을 경우 or 마지막 문제일 경우
                    if current_section != section_num:
                        # 이전 섹션의 부분합을 출력
                        detail_row[f'문제{section_num}'] = result['partial_scores'][int(section_num)]['score']
                        row_index.append(f'문제{section_num}')
                        section_num = current_section
                    
                    detail_row[f'{i+1}'] = score_result['points']
                    row_index.append(score_result['id'])
                    
                    # 마지막 섹션(문제2)부분합 점수를 출력
                    if i == len(result['score_results']) - 1:
                        detail_row[f'문제{current_section}'] = result['partial_scores'][int(current_section)]['score']
                        row_index.append(f'문제{current_section}')
                    
                detail_row['총점'] = result.get('total_score', 0)
                row_index.append('총점')
                detail_data.append(detail_row)

        summary_df = pd.DataFrame(summary_data)
        detail_df = pd.DataFrame(detail_data).transpose()
        detail_df.columns = detail_df.iloc[0]
        detail_df = detail_df[1:]
        
        detail_df.index = row_index
        # detail_df = pd.DataFrame(detail_data)
        
        for one_result in results:
            total_typo_err_score = one_result['typo'][0]
            typo_err_list = one_result['typo'][1:]
            
            typo_row = {
                '파일명': one_result['score']['filename'],
                '오타점수': total_typo_err_score,
            }
            typo_row.update({f'오타{i+1}': typo_err for i, typo_err in enumerate(typo_err_list)})
            
            typo_data.append(typo_row)
        
        typo_df = pd.DataFrame(typo_data)
        typo_df = typo_df.transpose()
        # transpose 후 행 -> 열 변환했을 때의 인덱스 제거 (기본 인덱스 제거)
        typo_df.reset_index(drop=True, inplace=True)

        # transpose 했으므로 첫 행을 컬럼명으로 지정
        typo_df.columns = typo_df.iloc[0]   # 첫 행을 컬럼명으로 지정
        typo_df = typo_df.drop(typo_df.index[0])  # 첫 행 제거
        
        
        # ExcelWriter 객체 생성
        with pd.ExcelWriter(output_path, engine='openpyxl') as writer:
            detail_df.to_excel(writer, sheet_name='채점상세내역', index=True)
            typo_df.to_excel(writer, sheet_name='오타내역', index=False)
            summary_df.to_excel(writer, sheet_name='채점결과요약', index=False)

            # 열 너비 자동 조정
            # for sheet_name in writer.sheets:
            #     worksheet = writer.sheets[sheet_name]
            #     for column_cells in worksheet.columns:
            #         max_length = 0
            #         column = column_cells[0].column_letter  # 열의 문자
            #         for cell in column_cells:
            #             try:
            #                 if cell.value:
            #                     max_length = max(max_length, len(str(cell.value)))
            #             except:
            #                 pass
            #         adjusted_width = (max_length + 2)
            #         worksheet.column_dimensions[column].width = adjusted_width

        return output_path

def main():

    # 시험회차 및 유형
    exam_round = '2601_2'
    # exam_round = '2522'
    
    # 채점하고자 하는 유형은 주석 해제
    exam_types = [
        'A',
        # 'B',
        # 'C',
        # 'D',
    ]

    # test_mode = False
    test_mode = True #/TEST 폴더 채점시 
    
    output_excel_paths = []
    for exam_type in exam_types:
        # JSON 채점기준표 파일 (예시:DIW_2503A.json)
        scoring_criteria_path = f'./DIW_{exam_round}{exam_type}.json'
        
        # xml(hml)파일 디렉토리 경로 (예시:./output/2503/A/DIW)
        xml_directory = f'./output/{exam_round}/{exam_type}/{"TEST" if test_mode else "DIW"}'
        # 오탈자 체크를 위한 정답 파일 경로 (예시:./output/A/DIW/DIW_2503A.hml)
        # correct_answer_file = f'./output/{exam_type}/DIW/DIW_{exam_round}{exam_type}.hml'
        correct_answer_file = f'./output/{exam_round}/{exam_type}/{"TEST" if test_mode else "DIW"}/DIW_{exam_round}{exam_type}.hml'
        
        # 엑셀 파일명 (비어있으면 자동생성) (예시:241001_DIW_2503A_채점결과.xlsx)
        timestamp = datetime.now().strftime("%y%m%d")
        output_path = f'{timestamp}_DIW_{exam_round}{exam_type}_{"TEST" if test_mode else "채점결과"}.xlsx'

        # 채점 클래스 초기화
        scorer = XMLScorer(scoring_criteria_path)

        # 폴더 내 모든 xml 파일 채점
        results = scorer.score_directory(xml_directory, correct_answer_file)
        if not results:
            print(f"❌ 채점 결과가 없습니다. {xml_directory} 폴더에 답안파일이 존재하는지 확인하세요.")
            continue
        # 채점 결과 엑셀로 저장
        output_excel_paths.append(scorer.export_to_excel(results, output_path))
    
    if output_excel_paths:
        print(f"채점 결과 엑셀 파일: {output_excel_paths}")    

if __name__ == '__main__': 
    main()