You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

284 lines
12KB

  1. # coding=utf-8
  2. import re
  3. import mysql_pool
  4. from pymysql.converters import escape_string
  5. import cosin_similarity
  6. import pandas as pd
  7. import datetime
  8. import requests
  9. import os
  10. wdys1 = {
  11. "项目名称": "xmmc",
  12. "现状问题": "xzwt",
  13. "系统基础": "xtjc",
  14. "项目目标": "xmmb",
  15. "预期绩效": "yqjx",
  16. "建设需求": "jsxq",
  17. "数据需求": "sjxq",
  18. "安全需求": "aqxq",
  19. "业务领域": "ywly",
  20. "核心业务": "hxyw",
  21. "业务需求": "ywxq",
  22. "业务协同": "ywxt",
  23. "建设层级": "jscj",
  24. "用户范围": "yhfw",
  25. "目标群体": "mbqt",
  26. "建设内容": "jsnr",
  27. "功能模块": "gnmk",
  28. "数据共享": "sjgx",
  29. "智能要素": "znys",
  30. "申报单位": "sbdw",
  31. "所属地区": "ssdq",
  32. "预算年度": "ysnd"
  33. }
  34. wdys2 = {
  35. "xmmc": "项目名称",
  36. "xzwt": "现状问题",
  37. "xtjc": "系统基础",
  38. "xmmb": "项目目标",
  39. "yqjx": "预期绩效",
  40. "jsxq": "建设需求",
  41. "sjxq": "数据需求",
  42. "aqxq": "安全需求",
  43. "ywly": "业务领域",
  44. "hxyw": "核心业务",
  45. "ywxq": "业务需求",
  46. "ywxt": "业务协同",
  47. "jscj": "建设层级",
  48. "yhfw": "用户范围",
  49. "mbqt": "目标群体",
  50. "jsnr": "建设内容",
  51. "gnmk": "功能模块",
  52. "sjgx": "数据共享",
  53. "znys": "智能要素",
  54. "sbdw": "申报单位",
  55. "ssdq": "所属地区",
  56. "ysnd": "预算年度"
  57. }
  58. gnmkys = {
  59. "gnmc": "功能名称",
  60. "gnms": "功能描述"
  61. }
  62. def getFlag():
  63. data_dict = {}
  64. df = pd.read_excel("0825-丽水系统查重维度.xlsx")
  65. data = df.values
  66. data = list(pd.Series(data[:, 1]).dropna())
  67. for d in data:
  68. try:
  69. wd = re.search("(.*?)(.*?%)", d).group(1).strip()
  70. wdc = wdys1.get(wd)
  71. if wdc:
  72. qz = re.search(".*?((.*?%))", d).group(1)
  73. data_dict[wdc] = qz
  74. except:
  75. pass
  76. return data_dict
  77. def gong_neng_mo_kuai(xmmc, mysql, dl, data, er_title):
  78. # 将excel文件中的所有第三维度内容进行拼接
  79. str_dict = {}
  80. for et in er_title:
  81. for d in data:
  82. if d[1] == et:
  83. if str_dict.get(et):
  84. str_dict[et] = str_dict.get(et) + d[3]
  85. else:
  86. str_dict[et] = d[3]
  87. for k, v in str_dict.items():
  88. mysql.sql_change_msg("""insert into user_history_module_data_total(xmmc,gnmc,gnms) value("%s", "%s", "%s")""" % (
  89. escape_string(xmmc), escape_string(k), escape_string(v)))
  90. #
  91. # similarity = cosin_similarity.CosineSimilarity(v, v)
  92. # similarity, keywords_x, keywords_y = similarity.main()
  93. # mysql.sql_change_msg("""insert into user_history_module_keywords (xmmc,gnmc,gnms) value("%s" ,"%s", "%s")""" % (
  94. # xmmc, escape_string(k), str(keywords_y)[dup_file_test:-dup_file_test]))
  95. def project_check(data_list):
  96. mysql = mysql_pool.ConnMysql()
  97. # 读取维度和权重
  98. # get_data_dict = getFlag()
  99. # 遍历excel存储路径
  100. for dl in data_list:
  101. # path = "0825-丽水系统查重维度1.xlsx"
  102. # 读取路径下的excel
  103. print(dl)
  104. df = pd.read_excel(dl[1])
  105. xmmc = df.keys()
  106. # print(type(xmmc[dup_file_test]))
  107. xmmc=xmmc[1]
  108. # print(type(xmmc))
  109. # xmmc1=''
  110. if "可研报告"or "可研性报告"or "可行性研究报告" in xmmc:
  111. xmmc=xmmc.replace('可研报告','')
  112. xmmc=xmmc.replace('可研性报告','')
  113. xmmc=xmmc.replace('可行性研究报告','')
  114. # print(xmmc)
  115. data = df.values
  116. # 将excel文件中的所有维度内容进行拼接
  117. join_str = ""
  118. str_dict = {}
  119. title = ""
  120. er_title = set()
  121. # for d in data:
  122. # # print(d)
  123. # if pd.notnull(d[0]):
  124. # title = d[0]
  125. # if title == "功能模块":
  126. # er_title.add(d[dup_file_test])
  127. # join_str = ""
  128. # for i in d[dup_file_test:]:
  129. # if pd.notnull(i):
  130. # join_str += i
  131. # str_dict[wdys1.get(title)] = join_str
  132. # else:
  133. # if title == "功能模块":
  134. # er_title.add(d[dup_file_test])
  135. # for i in d[dup_file_test:]:
  136. # if pd.notnull(i):
  137. # join_str += i
  138. # str_dict[wdys1.get(title)] = str_dict.get(wdys1.get(title)) + join_str
  139. # print(str_dict)
  140. gnmk_str = []
  141. # print(data)
  142. for d in data:
  143. if pd.notnull(d[0]):
  144. title = d[0]
  145. if title == "功能模块":
  146. er_title.add(d[1])
  147. join_str = ""
  148. for i in d[1:]:
  149. # print(type(i))
  150. # i=str(i)
  151. if pd.notnull(i):
  152. join_str += str(i)
  153. if title == "功能模块":
  154. # for j in d[3:]:
  155. if i == '功能描述':
  156. continue
  157. else:
  158. gnmk_str.append(i)
  159. str_dict[wdys1.get(title)] = join_str
  160. # print(str_dict.get(wdys1.get(title)))
  161. else:
  162. if title == "功能模块":
  163. er_title.add(d[1])
  164. for i in d[3:]:
  165. if pd.notnull(i):
  166. join_str += str(i)
  167. if title == "功能模块":
  168. gnmk_str.append(i)
  169. str_dict[wdys1.get(title)] = str_dict.get(wdys1.get(title)) + join_str
  170. # gnmk="".join(gnmk_str)
  171. # str_dict['gnmk']=gnmk
  172. gnmk = "".join(gnmk_str)
  173. str_dict['gnmk'] = gnmk
  174. # print(str_dict)
  175. # print(str_dict.get("xzwt")if str_dict.get("xzwt") else None)
  176. # print(str_dict.get('gnmk')if str_dict.get('gnmk')else None)
  177. mysql.sql_change_msg(
  178. """insert into user_history_data_total (xmmc, xzwt, xtjc, xmmb, yqjx, jsxq, sjxq, aqxq, ywly, hxyw, ywxq, ywxt, jscj, yhfw, mbqt, jsnr, gnmk, sjgx, znys,sbdw,ssdq,ysnd) value ("%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s","%s","%s","%s")"""
  179. % (escape_string(xmmc),
  180. escape_string(str_dict.get("xzwt")) if str_dict.get("xzwt") else None,
  181. escape_string(str_dict.get("xtjc")) if str_dict.get("xtjc") else None,
  182. escape_string(str_dict.get("xmmb")) if str_dict.get("xmmb") else None,
  183. escape_string(str_dict.get("yqjx")) if str_dict.get("yqjx") else None,
  184. escape_string(str_dict.get("jsxq")) if str_dict.get("jsxq") else None,
  185. escape_string(str_dict.get("sjxq")) if str_dict.get("sjxq") else None,
  186. escape_string(str_dict.get("aqxq")) if str_dict.get("aqxq") else None,
  187. escape_string(str_dict.get("ywly")) if str_dict.get("ywly") else None,
  188. escape_string(str_dict.get("hxyw")) if str_dict.get("hxyw") else None,
  189. escape_string(str_dict.get("ywxq")) if str_dict.get("ywxq") else None,
  190. escape_string(str_dict.get("ywxt")) if str_dict.get("ywxt") else None,
  191. escape_string(str_dict.get("jscj")) if str_dict.get("jscj") else None,
  192. escape_string(str_dict.get("yhfw")) if str_dict.get("yhfw") else None,
  193. escape_string(str_dict.get("mbqt")) if str_dict.get("mbqt") else None,
  194. escape_string(str_dict.get("jsnr")) if str_dict.get("jsnr") else None,
  195. escape_string(str_dict.get("gnmk")) if str_dict.get("gnmk") else None,
  196. escape_string(str_dict.get("sjgx")) if str_dict.get("sjgx") else None,
  197. escape_string(str_dict.get("znys")) if str_dict.get("znys") else None,
  198. escape_string(str_dict.get("sbdw")) if str_dict.get("sbdw") else None,
  199. escape_string(str_dict.get("ssdq")) if str_dict.get("ssdq") else None,
  200. escape_string(str_dict.get("ysnd")) if str_dict.get("ysnd") else None
  201. ))
  202. # project_gjc = {}
  203. # for w in wdys2.keys():
  204. # content_x = str_dict.get(w)
  205. # content_y = str_dict.get(w)
  206. # if content_x and content_y:
  207. # # 循环遍历每一个维度
  208. # similarity = cosin_similarity.CosineSimilarity(content_x, content_y)
  209. # # 相似度 关键词
  210. # similarity, keywords_x, keywords_y = similarity.main()
  211. # project_gjc[w] = keywords_y
  212. # mysql.sql_change_msg(
  213. # """insert into user_history_keywords (xmmc, xzwt, xtjc, xmmb, yqjx, jsxq, sjxq, aqxq, ywly, hxyw, ywxq, ywxt, jscj, yhfw, mbqt, jsnr, gnmk, sjgx, znys) value ("%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s")"""
  214. # % (xmmc, str(project_gjc.get("xzwt"))[dup_file_test:-dup_file_test] if project_gjc.get("xzwt") else None,
  215. # str(project_gjc.get("xtjc"))[dup_file_test:-dup_file_test] if project_gjc.get("xtjc") else None,
  216. # str(project_gjc.get("xmmb"))[dup_file_test:-dup_file_test] if project_gjc.get("xmmb") else None,
  217. # str(project_gjc.get("yqjx"))[dup_file_test:-dup_file_test] if project_gjc.get("yqjx") else None,
  218. # str(project_gjc.get("jsxq"))[dup_file_test:-dup_file_test] if project_gjc.get("jsxq") else None,
  219. # str(project_gjc.get("sjxq"))[dup_file_test:-dup_file_test] if project_gjc.get("sjxq") else None,
  220. # str(project_gjc.get("aqxq"))[dup_file_test:-dup_file_test] if project_gjc.get("aqxq") else None,
  221. # str(project_gjc.get("ywly"))[dup_file_test:-dup_file_test] if project_gjc.get("ywly") else None,
  222. # str(project_gjc.get("hxyw"))[dup_file_test:-dup_file_test] if project_gjc.get("hxyw") else None,
  223. # str(project_gjc.get("ywxq"))[dup_file_test:-dup_file_test] if project_gjc.get("ywxq") else None,
  224. # str(project_gjc.get("ywxt"))[dup_file_test:-dup_file_test] if project_gjc.get("ywxt") else None,
  225. # str(project_gjc.get("jscj"))[dup_file_test:-dup_file_test] if project_gjc.get("jscj") else None,
  226. # str(project_gjc.get("yhfw"))[dup_file_test:-dup_file_test] if project_gjc.get("yhfw") else None,
  227. # str(project_gjc.get("mbqt"))[dup_file_test:-dup_file_test] if project_gjc.get("mbqt") else None,
  228. # str(project_gjc.get("jsnr"))[dup_file_test:-dup_file_test] if project_gjc.get("jsnr") else None,
  229. # str(project_gjc.get("gnmk"))[dup_file_test:-dup_file_test] if project_gjc.get("gnmk") else None,
  230. # str(project_gjc.get("sjgx"))[dup_file_test:-dup_file_test] if project_gjc.get("sjgx") else None,
  231. # str(project_gjc.get("znys"))[dup_file_test:-dup_file_test] if project_gjc.get("znys") else None))
  232. gong_neng_mo_kuai(xmmc, mysql, dl, data, er_title)
  233. if __name__ == "__main__":
  234. path = r"D:\dup_file_test"
  235. data_list = os.listdir(path)
  236. print(len(data_list))
  237. for file in data_list:
  238. # print(path+'\\'+file)
  239. data_list = [(0, path + '\\' + file, "")]
  240. project_check(data_list)
  241. print("已存入************************************* %s" % file)
  242. """
  243. 建设目标,业务功能
  244. gnmk_str = []
  245. for d in data:
  246. if pd.notnull(d[0]):
  247. title = d[0]
  248. if title == "功能模块":
  249. er_title.add(d[dup_file_test])
  250. join_str = ""
  251. for i in d[dup_file_test:]:
  252. if pd.notnull(i):
  253. join_str += i
  254. if title == "功能模块":
  255. gnmk_str.append(i)
  256. str_dict[wdys1.get(title)] = join_str
  257. else:
  258. if title == "功能模块":
  259. er_title.add(d[dup_file_test])
  260. for i in d[dup_file_test:]:
  261. if pd.notnull(i):
  262. join_str += i
  263. if title == "功能模块":
  264. gnmk_str.append(i)
  265. str_dict[wdys1.get(title)] = str_dict.get(wdys1.get(title)) + join_str
  266. gnmk = "".join(gnmk_str)
  267. """