1번문항채점가능
This commit is contained in:
47
diff_hwp.py
Normal file
47
diff_hwp.py
Normal file
@@ -0,0 +1,47 @@
|
||||
import olefile
|
||||
import difflib
|
||||
|
||||
def extract_text_from_hwp(file_path):
|
||||
"""
|
||||
HWP 파일에서 텍스트를 추출합니다.
|
||||
"""
|
||||
try:
|
||||
ole = olefile.OleFileIO(file_path)
|
||||
if ole.exists('BodyText/Section0'):
|
||||
content = ole.openstream('BodyText/Section0').read()
|
||||
text = content.decode('utf-16le', errors='ignore')
|
||||
return text
|
||||
else:
|
||||
print(f"No 'BodyText/Section0' stream found in {file_path}")
|
||||
return ''
|
||||
except Exception as e:
|
||||
print(f"Error extracting text from {file_path}: {e}")
|
||||
return ''
|
||||
|
||||
def compare_texts(text1, text2):
|
||||
"""
|
||||
두 텍스트의 차이를 비교하고 차이점을 카운트합니다.
|
||||
"""
|
||||
diff = difflib.ndiff(text1.splitlines(), text2.splitlines())
|
||||
diff_count = 0
|
||||
for line in diff:
|
||||
if line.startswith('+ ') or line.startswith('- '):
|
||||
diff_count += 1
|
||||
return diff_count
|
||||
|
||||
def main():
|
||||
file1 = r'C:\Users\dra\project\HWP-Scoring\input\원본.hwp'
|
||||
file2 = r'C:\Users\dra\project\HWP-Scoring\input\원본 copy.hwp'
|
||||
|
||||
text1 = extract_text_from_hwp(file1)
|
||||
text2 = extract_text_from_hwp(file2)
|
||||
|
||||
if not text1 or not text2:
|
||||
print("텍스트를 추출하지 못했습니다.")
|
||||
return
|
||||
|
||||
diff_count = compare_texts(text1, text2)
|
||||
print(f"차이점 개수: {diff_count}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user