import olefile import difflib def extract_text_from_hwp(file_path): """ HWP 파일에서 텍스트를 추출합니다. """ try: ole = olefile.OleFileIO(file_path) if ole.exists('BodyText/Section0'): content = ole.openstream('BodyText/Section0').read() text = content.decode('utf-16le', errors='ignore') return text else: print(f"No 'BodyText/Section0' stream found in {file_path}") return '' except Exception as e: print(f"Error extracting text from {file_path}: {e}") return '' def compare_texts(text1, text2): """ 두 텍스트의 차이를 비교하고 차이점을 카운트합니다. """ diff = difflib.ndiff(text1.splitlines(), text2.splitlines()) diff_count = 0 for line in diff: if line.startswith('+ ') or line.startswith('- '): diff_count += 1 return diff_count def main(): file1 = r'C:\Users\dra\project\HWP-Scoring\input\원본.hwp' file2 = r'C:\Users\dra\project\HWP-Scoring\input\원본 copy.hwp' text1 = extract_text_from_hwp(file1) text2 = extract_text_from_hwp(file2) if not text1 or not text2: print("텍스트를 추출하지 못했습니다.") return diff_count = compare_texts(text1, text2) print(f"차이점 개수: {diff_count}") if __name__ == "__main__": main()