丽水查重代码
Vous ne pouvez pas sélectionner plus de 25 sujets Les noms de sujets doivent commencer par une lettre ou un nombre, peuvent contenir des tirets ('-') et peuvent comporter jusqu'à 35 caractères.

105 lignes
3.7KB

  1. import os
  2. import docx
  3. import requests
  4. import mysql_pool
  5. from pymysql.converters import escape_string
  6. def read_docx(file_path):
  7. mysql = mysql_pool.ConnMysql()
  8. # print(os.path.abspath('丽水市本级信息化项目建设方案模板.docx'))
  9. # # 通过url获取文件 http://jobapi.ningdatech.com/prometheus-yw_file_service/files/20240116/5a75cb43d17d4f1589d455d21547ab0c.doc
  10. # url = "http://jobapi.ningdatech.com/prometheus-yw_file_service/files/20240919/669f323c5c824f89a34bf04a66105902.doc"
  11. # file_name = "丽水市本级信息化项目建设方案模板.docx"
  12. # file_path = os.path.join("temp", file_name)
  13. try:
  14. # r = requests.get(url)
  15. # with open(file_path, "wb") as code:
  16. # code.write(r.content)
  17. # # 转化文件格式
  18. # convert_doc_to_docx(file_path, file_path.replace('.doc', '.docx'))
  19. # file_path = file_path.replace('.doc', '.docx')
  20. # 读取文件
  21. # doc = docx.Document(os.path.abspath(file_path))
  22. doc = docx.Document(file_path)
  23. # 是否开始获取文本
  24. is_acquire = 0
  25. is_project_name = 0
  26. content = []
  27. # 功能模块
  28. feature_map = {}
  29. # 功能名
  30. feature_name = ""
  31. # 项目名
  32. xmmc = ""
  33. for para in doc.paragraphs:
  34. style = para.style.name
  35. print(f"style: {para.style.name}, value: {para.text}")
  36. if str(style).find('toc') == 1:
  37. continue
  38. # 获取文档项目名称
  39. if para.text.find('项目名称') != -1:
  40. is_project_name = 1
  41. elif para.text.find('项目类型') != -1:
  42. is_project_name = 0
  43. if is_project_name == 1:
  44. if str(style).find('Heading') == -1 and str(style).find('toc') == -1:
  45. xmmc = para.text
  46. if para.text == '3.1.2 建设内容':
  47. is_acquire = 1
  48. elif para.text == '3.2 整体架构设计':
  49. is_acquire = 0
  50. if is_acquire == 1:
  51. if str(style).find('Heading') == -1:
  52. # print(f"content: {para.text}, style: {para.style.name}")
  53. feature_map[feature_name] = para.text
  54. # 重置功能名
  55. feature_name = ""
  56. content.append(para.text)
  57. else:
  58. feature_map[para.text] = ""
  59. feature_name = para.text
  60. # 使用next函数逐个获取元素
  61. for key, value in feature_map.items():
  62. if key != "3.1.2 建设内容" and key != "":
  63. print(f"Key: {key}, Value: {value}")
  64. # 将功能描述入库
  65. mysql.sql_change_msg(
  66. """insert into user_history_module_data(xmmc,gnmc,gnms,line, remark) value("%s", "%s", "%s", "%s", "%s")""" % (
  67. escape_string(xmmc), escape_string(key), escape_string(value), "", "自动拆解导入"))
  68. finally:
  69. # os.remove(file_path)
  70. print("删除文件")
  71. return "\n".join(content)
  72. def convert_doc_to_docx(doc_file, docx_file):
  73. try:
  74. if doc_file.endswith('.doc'):
  75. # 创建一个新的.docx文件
  76. docx_document = docx.Document()
  77. # 读取.doc文件的内容
  78. with open(doc_file, 'rb') as doc:
  79. content = doc.read()
  80. # 将.doc文件的内容写入.docx文件
  81. docx_document.add_paragraph(content)
  82. # 保存.docx文件
  83. docx_document.save(docx_file)
  84. finally:
  85. os.remove(doc_file)
  86. # file_path = "丽水市本级信息化项目建设方案模板.docx"
  87. # doc_content = read_docx()
  88. # print(doc_content)