xpathSearch.py

from lxml import etree
from difflib import SequenceMatcher
import json

class XMLPathHandler:
    def __init__(self, xml_file_path):
        """
        XML 파일을 로드하고 처리하는 핸들러
        :param xml_file_path: XML 파일 경로
        """
        self.tree = etree.parse(xml_file_path)
        self.root = self.tree.getroot()
        
    def similar(self, a, b):
        """
        두 문자열의 유사도를 계산
        :return: 유사도 점수 (0~1)
        """
        return SequenceMatcher(None, a, b).ratio()
    
    def find_similar_text(self, search_value, element_name, arg_name, threshold=0.8):
        """
        XML에서 유사한 텍스트를 찾음
        :param search_value: 찾고자 하는 텍스트
        :param element_name: 검색할 요소 이름
        :param arg_name: 검색할 속성 이름
        :param threshold: 유사도 임계값
        :return: 가장 유사한 텍스트와 점수
        """
        # 특정 요소의 특정 속성을 가진 모든 요소 검색
        xpath = f"//{element_name}[@{arg_name}]"
        elements = self.root.xpath(xpath)
        best_match = None
        best_score = 0
        
        for element in elements:
            attr_value = element.get(arg_name)
            if attr_value is not None:
                score = self.similar(search_value, attr_value)
                if score > threshold and score > best_score:
                    best_match = attr_value
                    best_score = score
                    
        return best_match, best_score

    def build_xpath(self, item):
        """
        설정 항목을 기반으로 XPath 생성
        :param item: 설정 항목
        :return: 구성된 XPath와 매칭된 텍스트, 유사도 점수
        """
        if not all(key in item for key in ['ele', 'arg', 'searchValue']):
            return None, None, 0
        
        # 유사 텍스트 검색
        matched_text, score = self.find_similar_text(
            item['searchValue'],
            item['ele'],
            item['arg']
        )
        
        if matched_text:
            # 기본 XPath 템플릿 구성
            xpath = f"//{item['ele']}[@{item['arg']}='{matched_text}']"
            
            # path가 제공된 경우, 해당 path를 기반으로 XPath 구성
            if 'path' in item and item['path']:
                xpath = item['path'].replace(f"[@{item['arg']}='']", f"[@{item['arg']}='{matched_text}']")
                xpath = xpath.replace(f"[@{item['arg']}='searchValue']", f"[@{item['arg']}='{matched_text}']")
                
            return xpath, matched_text, score
            
        return None, None, 0
    
    def process_config(self, config):
        """
        설정된 JSON 설정을 처리
        :param config: JSON 설정
        :return: 처리된 XPath 결과들
        """
        results = {}
        
        for key, item in config.items():
            results[key] = {
                'original_config': item,
                'processed_results': {}
            }
            
            xpath, matched_text, score = self.build_xpath(item)
            
            if xpath:
                try:
                    xpath_results = self.root.xpath(xpath)
                    results[key]['processed_results'] = {
                        'original_value': item['searchValue'],
                        'matched_value': matched_text,
                        'similarity_score': score,
                        'xpath': xpath,
                        'results': xpath_results
                    }
                except etree.XPathEvalError as e:
                    results[key]['error'] = f"XPath evaluation error: {str(e)}"
            else:
                results[key]['error'] = "Unable to build XPath: missing required configuration"
                    
        return results

# 사용 예시
def main():
    config = {
        "0": {
            "path": "//TEXTART[@Text='']/TEXTARTSHAPE/@FontName",
            "ele": "TEXTART",
            "arg": "Text",
            "searchValue": "즐거운컬러푸드영양교실",
            "value": "궁서체",
            "points": 10
        },
        "1": {
            "path": "//PARASHAPE[@Id=//TEXTART[@Text='']/ancestor::P/@ParaShape]/@Align",
            "ele": "PARASHAPE",
            "arg": "Align",
            "searchValue": "Center",
            "value": "Center",
            "points": 2
        }
    }
    
    xmlPath = r"C:\Users\dra\project\HWP-Scoring\output\1.hml";
    handler = XMLPathHandler(xmlPath)
    results = handler.process_config(config)
    
    # 결과 출력
    for key, result in results.items():
        print(f"\nProcessing config item {key}:")
        print(f"Original config: {result['original_config']}")
        
        if 'error' in result:
            print(f"Error: {result['error']}")
        else:
            processed = result['processed_results']
            print(f"Generated XPath: {processed['xpath']}")
            print(f"Matched text: {processed['matched_value']}")
            print(f"Similarity score: {processed['similarity_score']}")
            print(f"Results found: {processed['results']}")

if __name__ == "__main__":
    main()
1번문항채점가능 2025-01-08 17:43:24 +09:00			`from lxml import etree`
			`from difflib import SequenceMatcher`
			`import json`

			`class XMLPathHandler:`
			`def __init__(self, xml_file_path):`
			`"""`
			`XML 파일을 로드하고 처리하는 핸들러`
			`:param xml_file_path: XML 파일 경로`
			`"""`
			`self.tree = etree.parse(xml_file_path)`
			`self.root = self.tree.getroot()`

			`def similar(self, a, b):`
			`"""`
			`두 문자열의 유사도를 계산`
			`:return: 유사도 점수 (0~1)`
			`"""`
			`return SequenceMatcher(None, a, b).ratio()`

			`def find_similar_text(self, search_value, element_name, arg_name, threshold=0.8):`
			`"""`
			`XML에서 유사한 텍스트를 찾음`
			`:param search_value: 찾고자 하는 텍스트`
			`:param element_name: 검색할 요소 이름`
			`:param arg_name: 검색할 속성 이름`
			`:param threshold: 유사도 임계값`
			`:return: 가장 유사한 텍스트와 점수`
			`"""`
			`# 특정 요소의 특정 속성을 가진 모든 요소 검색`
			`xpath = f"//{element_name}[@{arg_name}]"`
			`elements = self.root.xpath(xpath)`
			`best_match = None`
			`best_score = 0`

			`for element in elements:`
			`attr_value = element.get(arg_name)`
			`if attr_value is not None:`
			`score = self.similar(search_value, attr_value)`
			`if score > threshold and score > best_score:`
			`best_match = attr_value`
			`best_score = score`

			`return best_match, best_score`

			`def build_xpath(self, item):`
			`"""`
			`설정 항목을 기반으로 XPath 생성`
			`:param item: 설정 항목`
			`:return: 구성된 XPath와 매칭된 텍스트, 유사도 점수`
			`"""`
			`if not all(key in item for key in ['ele', 'arg', 'searchValue']):`
			`return None, None, 0`

			`# 유사 텍스트 검색`
			`matched_text, score = self.find_similar_text(`
			`item['searchValue'],`
			`item['ele'],`
			`item['arg']`
			`)`

			`if matched_text:`
			`# 기본 XPath 템플릿 구성`
			`xpath = f"//{item['ele']}[@{item['arg']}='{matched_text}']"`

			`# path가 제공된 경우, 해당 path를 기반으로 XPath 구성`
			`if 'path' in item and item['path']:`
			`xpath = item['path'].replace(f"[@{item['arg']}='']", f"[@{item['arg']}='{matched_text}']")`
			`xpath = xpath.replace(f"[@{item['arg']}='searchValue']", f"[@{item['arg']}='{matched_text}']")`

			`return xpath, matched_text, score`

			`return None, None, 0`

			`def process_config(self, config):`
			`"""`
			`설정된 JSON 설정을 처리`
			`:param config: JSON 설정`
			`:return: 처리된 XPath 결과들`
			`"""`
			`results = {}`

			`for key, item in config.items():`
			`results[key] = {`
			`'original_config': item,`
			`'processed_results': {}`
			`}`

			`xpath, matched_text, score = self.build_xpath(item)`

			`if xpath:`
			`try:`
			`xpath_results = self.root.xpath(xpath)`
			`results[key]['processed_results'] = {`
			`'original_value': item['searchValue'],`
			`'matched_value': matched_text,`
			`'similarity_score': score,`
			`'xpath': xpath,`
			`'results': xpath_results`
			`}`
			`except etree.XPathEvalError as e:`
			`results[key]['error'] = f"XPath evaluation error: {str(e)}"`
			`else:`
			`results[key]['error'] = "Unable to build XPath: missing required configuration"`

			`return results`

			`# 사용 예시`
			`def main():`
			`config = {`
			`"0": {`
			`"path": "//TEXTART[@Text='']/TEXTARTSHAPE/@FontName",`
			`"ele": "TEXTART",`
			`"arg": "Text",`
			`"searchValue": "즐거운컬러푸드영양교실",`
			`"value": "궁서체",`
			`"points": 10`
			`},`
			`"1": {`
			`"path": "//PARASHAPE[@Id=//TEXTART[@Text='']/ancestor::P/@ParaShape]/@Align",`
			`"ele": "PARASHAPE",`
			`"arg": "Align",`
			`"searchValue": "Center",`
			`"value": "Center",`
			`"points": 2`
			`}`
			`}`

			`xmlPath = r"C:\Users\dra\project\HWP-Scoring\output\1.hml";`
			`handler = XMLPathHandler(xmlPath)`
			`results = handler.process_config(config)`

			`# 결과 출력`
			`for key, result in results.items():`
			`print(f"\nProcessing config item {key}:")`
			`print(f"Original config: {result['original_config']}")`

			`if 'error' in result:`
			`print(f"Error: {result['error']}")`
			`else:`
			`processed = result['processed_results']`
			`print(f"Generated XPath: {processed['xpath']}")`
			`print(f"Matched text: {processed['matched_value']}")`
			`print(f"Similarity score: {processed['similarity_score']}")`
			`print(f"Results found: {processed['results']}")`

			`if __name__ == "__main__":`
			`main()`