1번문항채점가능

This commit is contained in:
devdra9
2025-01-08 17:43:24 +09:00
parent ec3a3ba833
commit a4b6e22e4f
80 changed files with 1219 additions and 56 deletions

50
score4.py Normal file
View File

@@ -0,0 +1,50 @@
from lxml import etree
from difflib import SequenceMatcher
def get_all_text_xml_file(root):
# all_text = root.xpath("//CHAR/text() | //TEXTART/@Text")
all_text = root.xpath("//CHAR/text()")
print(f'all_text length: {len(all_text)}')
return all_text
def find_typos_and_spaces(original, target):
typos = []
spaces = []
space_differences = 0
for text in original:
# 오타 검사
words = text.split()
for word in words:
if not any(SequenceMatcher(None, word, target_word).ratio() >= 0.9 for target_word in target):
typos.append(word)
# 공백 차이 검사
for orig_text, targ_text in zip(original, target):
min_length = min(len(orig_text), len(targ_text))
orig_text = orig_text[:min_length]
targ_text = targ_text[:min_length]
orig_spaces = orig_text.count(' ')
targ_spaces = targ_text.count(' ')
space_differences += abs(orig_spaces - targ_spaces)
print(f'space_differences : {space_differences}')
return typos, spaces
# XML 파일 파싱
original_file = r"C:\Users\dra\project\HWP-Scoring\output\워드(한글)-010128-윤빈.hml"
target_file = r"C:\Users\dra\project\HWP-Scoring\output\워드(한글)-009939-이준.hml"
# target_file = r"C:\Users\dra\project\HWP-Scoring\output\원본 copy.hml"
tree = etree.parse(original_file)
root = tree.getroot()
original_text = get_all_text_xml_file(root)
tree = etree.parse(target_file)
root = tree.getroot()
target_text = get_all_text_xml_file(root)
typos, spaces = find_typos_and_spaces(original_text, target_text)
# print(f'Typos: {typos}')
# print(f'Spaces: {spaces}')