1번문항채점가능
This commit is contained in:
50
score4.py
Normal file
50
score4.py
Normal file
@@ -0,0 +1,50 @@
|
||||
from lxml import etree
|
||||
from difflib import SequenceMatcher
|
||||
|
||||
def get_all_text_xml_file(root):
|
||||
# all_text = root.xpath("//CHAR/text() | //TEXTART/@Text")
|
||||
all_text = root.xpath("//CHAR/text()")
|
||||
print(f'all_text length: {len(all_text)}')
|
||||
return all_text
|
||||
|
||||
def find_typos_and_spaces(original, target):
|
||||
typos = []
|
||||
spaces = []
|
||||
space_differences = 0
|
||||
|
||||
for text in original:
|
||||
# 오타 검사
|
||||
words = text.split()
|
||||
for word in words:
|
||||
if not any(SequenceMatcher(None, word, target_word).ratio() >= 0.9 for target_word in target):
|
||||
typos.append(word)
|
||||
|
||||
# 공백 차이 검사
|
||||
for orig_text, targ_text in zip(original, target):
|
||||
min_length = min(len(orig_text), len(targ_text))
|
||||
orig_text = orig_text[:min_length]
|
||||
targ_text = targ_text[:min_length]
|
||||
orig_spaces = orig_text.count(' ')
|
||||
targ_spaces = targ_text.count(' ')
|
||||
space_differences += abs(orig_spaces - targ_spaces)
|
||||
|
||||
print(f'space_differences : {space_differences}')
|
||||
|
||||
return typos, spaces
|
||||
|
||||
# XML 파일 파싱
|
||||
original_file = r"C:\Users\dra\project\HWP-Scoring\output\워드(한글)-010128-윤빈.hml"
|
||||
target_file = r"C:\Users\dra\project\HWP-Scoring\output\워드(한글)-009939-이준.hml"
|
||||
# target_file = r"C:\Users\dra\project\HWP-Scoring\output\원본 copy.hml"
|
||||
|
||||
tree = etree.parse(original_file)
|
||||
root = tree.getroot()
|
||||
original_text = get_all_text_xml_file(root)
|
||||
|
||||
tree = etree.parse(target_file)
|
||||
root = tree.getroot()
|
||||
target_text = get_all_text_xml_file(root)
|
||||
|
||||
typos, spaces = find_typos_and_spaces(original_text, target_text)
|
||||
# print(f'Typos: {typos}')
|
||||
# print(f'Spaces: {spaces}')
|
||||
Reference in New Issue
Block a user