|
- import os
- import docx
- import requests
- import mysql_pool
- from pymysql.converters import escape_string
-
-
- def read_docx(file_path):
- mysql = mysql_pool.ConnMysql()
- # print(os.path.abspath('丽水市本级信息化项目建设方案模板.docx'))
- # # 通过url获取文件 http://jobapi.ningdatech.com/prometheus-yw_file_service/files/20240116/5a75cb43d17d4f1589d455d21547ab0c.doc
- # url = "http://jobapi.ningdatech.com/prometheus-yw_file_service/files/20240919/669f323c5c824f89a34bf04a66105902.doc"
- # file_name = "丽水市本级信息化项目建设方案模板.docx"
- # file_path = os.path.join("temp", file_name)
- try:
- # r = requests.get(url)
- # with open(file_path, "wb") as code:
- # code.write(r.content)
-
- # # 转化文件格式
- # convert_doc_to_docx(file_path, file_path.replace('.doc', '.docx'))
- # file_path = file_path.replace('.doc', '.docx')
- # 读取文件
- # doc = docx.Document(os.path.abspath(file_path))
- doc = docx.Document(file_path)
- # 是否开始获取文本
- is_acquire = 0
- is_project_name = 0
- content = []
- # 功能模块
- feature_map = {}
- # 功能名
- feature_name = ""
- # 项目名
- xmmc = ""
- for para in doc.paragraphs:
- style = para.style.name
- print(f"style: {para.style.name}, value: {para.text}")
- if str(style).find('toc') == 1:
- continue
- # 获取文档项目名称
- if para.text.find('项目名称') != -1:
- is_project_name = 1
- elif para.text.find('项目类型') != -1:
- is_project_name = 0
- if is_project_name == 1:
- if str(style).find('Heading') == -1 and str(style).find('toc') == -1:
- xmmc = para.text
-
- if para.text == '3.1.2 建设内容':
- is_acquire = 1
- elif para.text == '3.2 整体架构设计':
- is_acquire = 0
- if is_acquire == 1:
- if str(style).find('Heading') == -1:
- # print(f"content: {para.text}, style: {para.style.name}")
- feature_map[feature_name] = para.text
- # 重置功能名
- feature_name = ""
- content.append(para.text)
- else:
- feature_map[para.text] = ""
- feature_name = para.text
-
- # 使用next函数逐个获取元素
- for key, value in feature_map.items():
- if key != "3.1.2 建设内容" and key != "":
- print(f"Key: {key}, Value: {value}")
- # 将功能描述入库
- mysql.sql_change_msg(
- """insert into user_history_module_data(xmmc,gnmc,gnms,line, remark) value("%s", "%s", "%s", "%s", "%s")""" % (
- escape_string(xmmc), escape_string(key), escape_string(value), "", "自动拆解导入"))
-
-
- finally:
- # os.remove(file_path)
- print("删除文件")
-
- return "\n".join(content)
-
-
- def convert_doc_to_docx(doc_file, docx_file):
- try:
- if doc_file.endswith('.doc'):
- # 创建一个新的.docx文件
- docx_document = docx.Document()
-
- # 读取.doc文件的内容
- with open(doc_file, 'rb') as doc:
- content = doc.read()
-
- # 将.doc文件的内容写入.docx文件
- docx_document.add_paragraph(content)
-
- # 保存.docx文件
- docx_document.save(docx_file)
- finally:
- os.remove(doc_file)
-
-
- # file_path = "丽水市本级信息化项目建设方案模板.docx"
- # doc_content = read_docx()
- # print(doc_content)
|