99 lines
3.6 KiB
Python
99 lines
3.6 KiB
Python
|
||
from docx import Document
|
||
from openpyxl import load_workbook
|
||
import re
|
||
|
||
|
||
QUES_CLASS = '安全领域'
|
||
|
||
OPTION_LIST=[
|
||
[r"A:\s*(\S+)", "D"],
|
||
[r"B:\s*(\S+)", "E"],
|
||
[r"C:\s*(\S+)", "F"],
|
||
[r"D:\s*(\S+)", "G"],
|
||
[r"E:\s*(\S+)", "H"],
|
||
[r"F:\s*(\S+)", "I"],
|
||
]
|
||
|
||
def fill_excel(matches, excel_path, local):
|
||
wb = load_workbook(excel_path)
|
||
ws = wb.active
|
||
if matches:
|
||
ws[local] = matches
|
||
wb.save(excel_path)
|
||
|
||
|
||
def match_text(text, pattern):
|
||
matches = re.search(pattern, text)
|
||
if matches:
|
||
results = matches.group(1)
|
||
return results
|
||
return ''
|
||
|
||
# 解析word文档
|
||
def interpret_text(start:int, excel_path:str, doc_path:str, field=None):
|
||
wordfile = Document(doc_path)
|
||
correct_dict = {}
|
||
option_dict = {}
|
||
question_type = {}
|
||
ques_text = {}
|
||
for index, p in enumerate(wordfile.paragraphs):
|
||
correct_answer = match_text(p.text, r"正确答案:\s*(\S+)") # 匹配正确答案
|
||
if correct_answer:
|
||
correct_dict.setdefault("correct_answer", []).append(correct_answer)
|
||
for e in OPTION_LIST: # 匹配选项
|
||
result = match_text(p.text, e[0])
|
||
if result:
|
||
option_dict.setdefault(e[1], []).append(result)
|
||
# 题目类型
|
||
# fill_excel(QUES_CLASS, excel_path, 'B'+str(index+start))
|
||
if p.text[:1]=='【' and p.text[4:5]=='】':
|
||
q_type = p.text[1:3] # 题目类型
|
||
question_type.setdefault("question_type", []).append(q_type)
|
||
if p.text[-2]=='分': #(3分)
|
||
question_text = p.text[5:-4].strip()
|
||
result = bool(re.match(r'\d+、', question_text)) # 处理题目前的序号
|
||
if result:
|
||
question = re.sub(r'\d+、', '',question_text)
|
||
ques_text.setdefault("question_text", []).append(question)
|
||
dict_list = [correct_dict,ques_text,option_dict,question_type]
|
||
for d in dict_list:
|
||
for key,value in d.items():
|
||
if key == "correct_answer":
|
||
for v in range(len(value)):
|
||
fill_excel(value[v], excel_path, 'J'+str(start+v))
|
||
elif key == "question_type":
|
||
for v in range(len(value)):
|
||
fill_excel(value[v], excel_path, 'A'+str(start+v))
|
||
if field:
|
||
fill_excel(field, excel_path, 'B'+str(start+v))
|
||
else:
|
||
fill_excel(QUES_CLASS, excel_path, 'B'+str(start+v))
|
||
elif key == "question_text":
|
||
for v in range(len(value)):
|
||
fill_excel(value[v], excel_path, 'C'+str(start+v))
|
||
elif key == "D":
|
||
for v in range(len(value)):
|
||
fill_excel(value[v], excel_path, 'D'+str(start+v))
|
||
elif key == "E":
|
||
for v in range(len(value)):
|
||
fill_excel(value[v], excel_path, 'E'+str(start+v))
|
||
elif key == "F":
|
||
for v in range(len(value)):
|
||
fill_excel(value[v], excel_path, 'F'+str(start+v))
|
||
elif key == "G":
|
||
for v in range(len(value)):
|
||
fill_excel(value[v], excel_path, 'G'+str(start+v))
|
||
elif key == "H":
|
||
for v in range(len(value)):
|
||
fill_excel(value[v], excel_path, 'H'+str(start+v))
|
||
elif key == "I":
|
||
for v in range(len(value)):
|
||
fill_excel(value[v], excel_path, 'I'+str(start+v))
|
||
return 'OK'
|
||
|
||
|
||
if __name__ == '__main__':
|
||
doc_path = "C:\code\data\\test.docx"
|
||
excel_path = "C:\code\data\question.xlsx"
|
||
interpret_text(3, excel_path, doc_path) |