丽水查重代码
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

512 lines
25KB

  1. # coding=utf-8
  2. import sys
  3. import re
  4. import baidu
  5. import model_scope
  6. import mysql_pool
  7. from pymysql.converters import escape_string
  8. import cosin_similarity
  9. import pandas as pd
  10. import datetime
  11. import requests
  12. import glm_utils
  13. from threading import Thread
  14. wdys1 = {
  15. "项目名称": "xmmc",
  16. "现状问题": "xzwt",
  17. "系统基础": "xtjc",
  18. "项目目标": "xmmb",
  19. "预期绩效": "yqjx",
  20. "建设需求": "jsxq",
  21. "数据需求": "sjxq",
  22. "安全需求": "aqxq",
  23. "业务领域": "ywly",
  24. "核心业务": "hxyw",
  25. "业务需求": "ywxq",
  26. "业务协同": "ywxt",
  27. "建设层级": "jscj",
  28. "用户范围": "yhfw",
  29. "目标群体": "mbqt",
  30. "建设内容": "jsnr",
  31. "功能模块": "gnmk",
  32. "数据共享": "sjgx",
  33. "智能要素": "znys"
  34. }
  35. wdys2 = {
  36. "xmmc": "项目名称",
  37. "xzwt": "现状问题",
  38. "xtjc": "系统基础",
  39. "xmmb": "项目目标",
  40. "yqjx": "预期绩效",
  41. "jsxq": "建设需求",
  42. "sjxq": "数据需求",
  43. "aqxq": "安全需求",
  44. "ywly": "业务领域",
  45. "hxyw": "核心业务",
  46. "ywxq": "业务需求",
  47. "ywxt": "业务协同",
  48. "jscj": "建设层级",
  49. "yhfw": "用户范围",
  50. "mbqt": "目标群体",
  51. "jsnr": "建设内容",
  52. "gnmk": "功能模块",
  53. "sjgx": "数据共享",
  54. "znys": "智能要素"
  55. }
  56. gnmkys = {
  57. "gnmc": "功能名称",
  58. "gnms": "功能描述"
  59. }
  60. def getFlag():
  61. data_dict = {}
  62. df = pd.read_excel("0825.xlsx")
  63. data = df.values
  64. data = list(pd.Series(data[:, 1]).dropna())
  65. for d in data:
  66. try:
  67. wd = re.search("(.*?)(.*?%)", d).group(1).strip()
  68. wdc = wdys1.get(wd)
  69. if wdc:
  70. qz = re.search(".*?((.*?%))", d).group(1)
  71. data_dict[wdc] = qz
  72. except:
  73. pass
  74. return data_dict
  75. def gong_neng_mo_kuai(mysql, dl, data, er_title, str_dict_new):
  76. nlp = model_scope.Bert_nlp("corom")
  77. # 将excel文件中的所有第三维度内容进行拼接
  78. str_dict = {}
  79. for et in er_title:
  80. for d in data:
  81. if d[1] == et:
  82. if str_dict.get(et):
  83. str_dict[et] = str_dict.get(et) + d[3]
  84. else:
  85. str_dict[et] = d[3]
  86. for k, v in str_dict.items():
  87. mysql.sql_change_msg(
  88. """insert into idc_project_module (project_id, check_duplicate_count, module_name, module_content, create_time, update_time, tag) value(%d, 1, "%s", "%s", "%s", "%s", "模块")""" % (
  89. int(dl[0]), k, v, str(datetime.datetime.now())[:-7], str(datetime.datetime.now())[:-7]))
  90. module_id_list = mysql.sql_select_many(
  91. """select project_module_id, module_name, module_content from idc_project_module where project_id=%d""" % dl[
  92. 0])
  93. data_list = []
  94. for mil in module_id_list:
  95. data_dict = {}
  96. data_dict["project_module_id"] = mil.get("project_module_id")
  97. data_dict["gnmc"] = mil.get("module_name")
  98. # data_dict["glm_desc"] = baidu.CallResult(mil.get("module_content"))
  99. data_dict["gnms"] = mil.get("module_content")
  100. # print(f'module_content = ({mil.get("module_content")}), glm_desc = ({data_dict["glm_desc"]})')
  101. data_list.append(data_dict)
  102. # print(data_list)
  103. for i in data_list:
  104. # where xmmc = '南浔区信息化项目全生命周期管理系统' where xmmc = '丽水数字教育(一期)项目'
  105. gnmk_copy1 = mysql.sql_select_many("""select * from user_history_module_data WHERE gnmc not in ('专项考评管理应用')""")
  106. if gnmk_copy1:
  107. desc_info_list = []
  108. for gc in gnmk_copy1:
  109. if gc.get("xmmc") != dl[2]:
  110. desc_info_list.append(gc.get("gnms"))
  111. similarity, s1, s2, idx = nlp.main(i.get("gnms"), desc_info_list)
  112. if idx == -1:
  113. continue
  114. mysql.sql_change_msg(
  115. """insert into idc_project_module_check (project_module_id, module_name, project_name, company_name, create_time, update_time) value(%d, "%s", "%s", "%s", "%s", "%s")"""
  116. % (
  117. i.get("project_module_id"), escape_string(gnmk_copy1[idx].get("gnmc")), escape_string(gnmk_copy1[idx].get("xmmc")), "",
  118. str(datetime.datetime.now())[:-7],
  119. str(datetime.datetime.now())[:-7]))
  120. dup_module_id = mysql.cur.lastrowid
  121. check_module_info(mysql, gnmk_copy1[idx], dl, i, dup_module_id, similarity)
  122. def check_module_info(mysql, gc, dl, pro, dup_module_id, score):
  123. total_similarity1 = 0
  124. total_similarity2 = 0
  125. for j in ["gnmc", "gnms"]:
  126. # 循环遍历每一个模块名称
  127. content_x = gc.get(j)
  128. content_y = pro.get(j)
  129. if content_x and content_y:
  130. if j == "gnmc":
  131. # print("功能名称对比")
  132. similarity, check_desc = glm_utils.AutoDLResult(f"""请帮我分析以下两段重复语句重复的地方: \n第一段话是:'{content_y}', \n ----------------- \n 第二段话是:'{content_x}'""")
  133. # # 相似度相加
  134. if similarity is None:
  135. similarity = 0
  136. print(f"similarity is {similarity}")
  137. total_similarity1 += similarity/100
  138. mysql.sql_change_msg(
  139. """insert into idc_project_module_check_detail (dup_module_id, project_name, module_content, dup_module_content, similarity, dimension, create_time, update_time, check_desc) value (%d, "%s", "%s", "%s", %f, "%s", "%s", "%s", "%s")"""
  140. % (dup_module_id, dl[2], escape_string(content_y), escape_string(content_x), similarity,
  141. "功能名称",
  142. str(datetime.datetime.now())[:-7], str(datetime.datetime.now())[:-7], escape_string(check_desc)))
  143. else:
  144. check_desc = glm_utils.AutoDLResultNoNum(f"""请帮我分析以下两段重复语句重复的地方: \n第一段话是:'{content_y}', \n ----------------- \n 第二段话是:'{content_x}'""")
  145. similarity = score
  146. # 相似度相加 gnms
  147. total_similarity2 += similarity
  148. module_content = pro.get("gnms")
  149. dup_module_content = gc.get("gnms")
  150. mysql.sql_change_msg(
  151. """insert into idc_project_module_check_detail (dup_module_id, project_name, module_content, dup_module_content, similarity, dimension, create_time, update_time, check_desc) value (%d, "%s", "%s", "%s", %f, "%s", "%s", "%s", "%s")"""
  152. % (dup_module_id, dl[2], escape_string(module_content), escape_string(dup_module_content),
  153. similarity,
  154. "功能模块描述",
  155. str(datetime.datetime.now())[:-7], str(datetime.datetime.now())[:-7],
  156. escape_string(check_desc)))
  157. mysql.sql_change_msg("""update idc_project_module_check set similarity=%f where dup_module_id=%d""" % (
  158. total_similarity1 + total_similarity2, dup_module_id))
  159. def project_check(data_list):
  160. mysql = mysql_pool.ConnMysql()
  161. # mysql.sql_select_many("""select * from mkgjc""")
  162. # 读取历史数据
  163. xmnr_count = len(mysql.sql_select_many("""select * from user_history_data"""))
  164. gnmk_count = len(mysql.sql_select_many("""select * from user_history_module_data"""))
  165. nlp = model_scope.Bert_nlp("corom")
  166. # 遍历excel存储路径
  167. for dl in data_list:
  168. # path = "0825-丽水系统查重维度1.xlsx"
  169. # 读取路径下的excel
  170. print(dl,dl[1])
  171. df = pd.read_excel(dl[1])
  172. data = df.values
  173. # 将excel文件中的所有维度内容进行拼接
  174. join_str = ""
  175. str_dict = {}
  176. gnmk_str = []
  177. title = ""
  178. er_title = set()
  179. for d in data:
  180. # if pd.notnull(d[0]):
  181. # title = d[0]
  182. # if title == "功能模块":
  183. # er_title.add(d[1])
  184. # join_str = ""
  185. # for i in d[1:]:
  186. # if pd.notnull(i):
  187. # join_str += str(i)
  188. # str_dict[wdys1.get(title)] = join_str
  189. if pd.notnull(d[0]):
  190. title = d[0]
  191. if title == "功能模块":
  192. er_title.add(d[1])
  193. join_str = ""
  194. for i in d[1:]:
  195. if pd.notnull(i):
  196. join_str += str(i)
  197. if title == "功能模块":
  198. if i == '功能描述':
  199. continue
  200. else:
  201. gnmk_str.append(i)
  202. str_dict[wdys1.get(title)] = join_str
  203. else:
  204. if title == "功能模块":
  205. er_title.add(d[1])
  206. for i in d[1:]:
  207. if pd.notnull(i):
  208. join_str += str(i)
  209. str_dict[wdys1.get(title)] = str_dict.get(wdys1.get(title)) + join_str
  210. # print(str_dict)
  211. gnmk = ",".join(gnmk_str)
  212. str_dict['gnmk'] = gnmk
  213. mysql.sql_change_msg(
  214. """insert into user_data (xmmc, xzwt, xtjc, xmmb, yqjx, jsxq, sjxq, aqxq, ywly, hxyw, ywxq, ywxt, jscj, yhfw, mbqt, jsnr, gnmk, sjgx, znys) value ("%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s")"""
  215. % (dl[0], str_dict.get("xzwt") if str_dict.get("xzwt") else None,
  216. str_dict.get("xtjc") if str_dict.get("xtjc") else None,
  217. str_dict.get("xmmb") if str_dict.get("xmmb") else None,
  218. str_dict.get("yqjx") if str_dict.get("yqjx") else None,
  219. str_dict.get("jsxq") if str_dict.get("jsxq") else None,
  220. str_dict.get("sjxq") if str_dict.get("sjxq") else None,
  221. str_dict.get("aqxq") if str_dict.get("aqxq") else None,
  222. str_dict.get("ywly") if str_dict.get("ywly") else None,
  223. str_dict.get("hxyw") if str_dict.get("hxyw") else None,
  224. str_dict.get("ywxq") if str_dict.get("ywxq") else None,
  225. str_dict.get("ywxt") if str_dict.get("ywxt") else None,
  226. str_dict.get("jscj") if str_dict.get("jscj") else None,
  227. str_dict.get("yhfw") if str_dict.get("yhfw") else None,
  228. str_dict.get("mbqt") if str_dict.get("mbqt") else None,
  229. str_dict.get("jsnr") if str_dict.get("jsnr") else None,
  230. str_dict.get("gnmk") if str_dict.get("gnmk") else None,
  231. str_dict.get("sjgx") if str_dict.get("sjgx") else None,
  232. str_dict.get("znys") if str_dict.get("znys") else None))
  233. # 或取所有的xmnr_copy1 where xmmc = '南浔区信息化项目全生命周期管理系统' where xmmc = '丽水数字教育(一期)项目'
  234. xmnr_copy1 = mysql.sql_select_many("""select * from user_history_data """)
  235. # 对比xmnr_copy1和xmnr维度是否都有
  236. if xmnr_copy1:
  237. # threads = [Thread(target=check_project_info, args=(mysql, dl, xc, str_dict)) for xc in xmnr_copy1]
  238. # for t in threads:
  239. # t.start()
  240. #
  241. # for t in threads:
  242. # t.join()
  243. # pro_ths = []
  244. # for xc in xmnr_copy1:
  245. # # check_project_info(mysql, dl, xc, str_dict)
  246. # p = Thread(target=check_project_info, args=(mysql, dl, xc, str_dict))
  247. # pro_ths.append(p)
  248. # p.start()
  249. # for p in pro_ths:
  250. # p.join()
  251. xmnr_copy1_new = []
  252. for xc in xmnr_copy1:
  253. if xc["xmmc"] == str_dict.get("xmmc"):
  254. continue
  255. check_project_info(mysql, dl, xc, str_dict, nlp)
  256. # 找出相识对最高的项目通过glm分析
  257. mysql.sql_change_msg(
  258. """update idc_project set dup_status=3, one_vote_veto_status=1, self_check_status=1, history_project_count=%d ,module_count=%d where project_id=%d""" % (
  259. xmnr_count, gnmk_count, dl[0]))
  260. gong_neng_mo_kuai(mysql, dl, data, er_title, str_dict)
  261. def check_project_info(mysql, dl, xc, str_dict, nlp):
  262. total_keywords = {}
  263. total_similarity = 0
  264. dup_count = 0
  265. # 保存相加后的相似度到idc_project_check
  266. mysql.sql_change_msg(
  267. """insert into idc_project_check (project_id, dup_project_name, file_path, company_name, create_year, project_tag, project_range_tag, project_area, create_time, update_time) value ("%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s")"""
  268. % (dl[0], escape_string(xc.get("xmmc")), escape_string(dl[1]), "", "", "需求相似、业务相似", "历史项目", "",
  269. str(datetime.datetime.now())[:-7], str(datetime.datetime.now())[:-7]))
  270. dup_id = mysql.cur.lastrowid
  271. for x in list(xc.keys())[1:]:
  272. content_x = xc.get(x)
  273. content_y = str_dict.get(x)
  274. if content_x and content_y:
  275. if x == 'gnmk':
  276. continue
  277. elif x == 'jsnr':
  278. continue
  279. else:
  280. dup_count += 1
  281. if ((xc['gnmk'] == 'None' or xc['gnmk'] is None or str.strip(xc['gnmk']) == '') and (str_dict['gnmk'] is None or str.strip(str_dict['gnmk']) == '')) and (
  282. not xc['jsnr'] is None and xc['jsnr'] != 'None' and not str_dict['jsnr'] is None and len(str.strip(str_dict['jsnr'])) > 0):
  283. for x in list(xc.keys())[1:]:
  284. content_x = xc.get(x)
  285. content_y = str_dict.get(x)
  286. if content_x and content_y:
  287. if x == 'gnmk':
  288. # 循环遍历每一个维度
  289. contents_y = []
  290. contents_y.append(content_y)
  291. similarity, content1, content2, idx = nlp.main(content_x, contents_y)
  292. similarity = similarity * 0
  293. # print("**************相似度: %.2f%%" % similarity, "关键词: %s" % keywords_y)
  294. # 相似度相加
  295. total_similarity += similarity
  296. function_content = content_y
  297. dup_function_content = content_x
  298. # 保存每个维度对应的相似度到idc_project_check_detail
  299. mysql.sql_change_msg(
  300. """insert into idc_project_check_detail (dup_id, dimension, similarity, function_content, dup_function_content, create_time, update_time) value (%d, "%s", %f, "%s", "%s", "%s", "%s")"""
  301. % (dup_id, wdys2.get(x), similarity, escape_string(function_content),
  302. escape_string(dup_function_content), str(datetime.datetime.now())[:-7],
  303. str(datetime.datetime.now())[:-7]))
  304. elif x == 'jsnr':
  305. # 循环遍历每一个维度
  306. contents_y = []
  307. contents_y.append(content_y)
  308. similarity, content1, content2, idx = nlp.main(content_x, contents_y)
  309. similarity = similarity * 40
  310. # print("**************相似度: %.2f%%" % similarity, "关键词: %s" % keywords_y)
  311. # 相似度相加
  312. total_similarity += similarity
  313. function_content = content_y
  314. dup_function_content = content_x
  315. # 保存每个维度对应的相似度到idc_project_check_detail
  316. mysql.sql_change_msg(
  317. """insert into idc_project_check_detail (dup_id, dimension, similarity, function_content, dup_function_content, create_time, update_time) value (%d, "%s", %f, "%s", "%s", "%s", "%s")"""
  318. % (dup_id, wdys2.get(x), similarity, escape_string(function_content),
  319. escape_string(dup_function_content), str(datetime.datetime.now())[:-7],
  320. str(datetime.datetime.now())[:-7]))
  321. else:
  322. # 循环遍历每一个维度
  323. contents_y = []
  324. contents_y.append(content_y)
  325. similarity, content1, content2, idx = nlp.main(content_x, contents_y)
  326. similarity = similarity * (60 / dup_count)
  327. # print("**************相似度: %.2f%%" % similarity, "关键词: %s" % keywords_y)
  328. # 相似度相加
  329. total_similarity += similarity
  330. function_content = content_y
  331. dup_function_content = content_x
  332. # 保存每个维度对应的相似度到idc_project_check_detail
  333. mysql.sql_change_msg(
  334. """insert into idc_project_check_detail (dup_id, dimension, similarity, function_content, dup_function_content, create_time, update_time) value (%d, "%s", %f, "%s", "%s", "%s", "%s")"""
  335. % (dup_id, wdys2.get(x), similarity, escape_string(function_content),
  336. escape_string(dup_function_content), str(datetime.datetime.now())[:-7],
  337. str(datetime.datetime.now())[:-7]))
  338. elif ((xc['jsnr'] == 'None' or xc['jsnr'] is None or str.strip(xc['jsnr']) == '') and (str_dict['jsnr'] is None or str.strip(str_dict['jsnr']) == '')) and (
  339. not xc['gnmk'] is None and xc['gnmk'] != 'None' and not str_dict['gnmk'] is None and len(str.strip(str_dict['gnmk'])) > 0):
  340. for x in list(xc.keys())[1:]:
  341. content_x = xc.get(x)
  342. content_y = str_dict.get(x)
  343. if content_x and content_y:
  344. if x == 'gnmk':
  345. # 循环遍历每一个维度
  346. contents_y = []
  347. contents_y.append(content_y)
  348. similarity, content1, content2, idx = nlp.main(content_x, contents_y)
  349. similarity = similarity * 50
  350. # print("**************相似度: %.2f%%" % similarity, "关键词: %s" % keywords_y)
  351. # 相似度相加
  352. total_similarity += similarity
  353. function_content = content_y
  354. dup_function_content = content_x
  355. # 保存每个维度对应的相似度到idc_project_check_detail
  356. mysql.sql_change_msg(
  357. """insert into idc_project_check_detail (dup_id, dimension, similarity, function_content, dup_function_content, create_time, update_time) value (%d, "%s", %f, "%s", "%s", "%s", "%s")"""
  358. % (dup_id, wdys2.get(x), similarity, escape_string(function_content),
  359. escape_string(dup_function_content), str(datetime.datetime.now())[:-7],
  360. str(datetime.datetime.now())[:-7]))
  361. elif x == 'jsnr':
  362. # 循环遍历每一个维度
  363. contents_y = []
  364. contents_y.append(content_y)
  365. similarity, content1, content2, idx = nlp.main(content_x, contents_y)
  366. similarity = similarity * 0
  367. # print("**************相似度: %.2f%%" % similarity, "关键词: %s" % keywords_y)
  368. # 相似度相加
  369. total_similarity += similarity
  370. function_content = content_y
  371. dup_function_content = content_x
  372. # 保存每个维度对应的相似度到idc_project_check_detail
  373. mysql.sql_change_msg(
  374. """insert into idc_project_check_detail (dup_id, dimension, similarity, function_content, dup_function_content, create_time, update_time) value (%d, "%s", %f, "%s", "%s", "%s", "%s")"""
  375. % (dup_id, wdys2.get(x), similarity, escape_string(function_content),
  376. escape_string(dup_function_content), str(datetime.datetime.now())[:-7],
  377. str(datetime.datetime.now())[:-7]))
  378. else:
  379. # 循环遍历每一个维度
  380. contents_y = []
  381. contents_y.append(content_y)
  382. similarity, content1, content2, idx = nlp.main(content_x, contents_y)
  383. similarity = similarity * (50 / dup_count)
  384. # print("**************相似度: %.2f%%" % similarity, "关键词: %s" % keywords_y)
  385. # 相似度相加
  386. total_similarity += similarity
  387. function_content = content_y
  388. dup_function_content = content_x
  389. # 保存每个维度对应的相似度到idc_project_check_detail
  390. mysql.sql_change_msg(
  391. """insert into idc_project_check_detail (dup_id, dimension, similarity, function_content, dup_function_content, create_time, update_time) value (%d, "%s", %f, "%s", "%s", "%s", "%s")"""
  392. % (dup_id, wdys2.get(x), similarity, escape_string(function_content),
  393. escape_string(dup_function_content), str(datetime.datetime.now())[:-7],
  394. str(datetime.datetime.now())[:-7]))
  395. else:
  396. for x in list(xc.keys())[1:]:
  397. content_x = xc.get(x)
  398. content_y = str_dict.get(x)
  399. if content_x and content_y:
  400. if x == 'gnmk':
  401. # 循环遍历每一个维度
  402. contents_y = []
  403. contents_y.append(content_y)
  404. similarity, content1, content2, idx = nlp.main(content_x, contents_y)
  405. similarity = similarity * 50
  406. # 相似度相加
  407. total_similarity += similarity
  408. function_content = content_y
  409. dup_function_content = content_x
  410. # 保存每个维度对应的相似度到idc_project_check_detail
  411. mysql.sql_change_msg(
  412. """insert into idc_project_check_detail (dup_id, dimension, similarity, function_content, dup_function_content, create_time, update_time) value (%d, "%s", %f, "%s", "%s", "%s", "%s")"""
  413. % (dup_id, wdys2.get(x), similarity, escape_string(function_content),
  414. escape_string(dup_function_content), str(datetime.datetime.now())[:-7],
  415. str(datetime.datetime.now())[:-7]))
  416. elif x == 'jsnr':
  417. # 循环遍历每一个维度
  418. contents_y = []
  419. contents_y.append(content_y)
  420. similarity, content1, content2, idx = nlp.main(content_x, contents_y)
  421. similarity = similarity * 40
  422. # 相似度相加
  423. total_similarity += similarity
  424. function_content = content_y
  425. dup_function_content = content_x
  426. # 保存每个维度对应的相似度到idc_project_check_detail
  427. mysql.sql_change_msg(
  428. """insert into idc_project_check_detail (dup_id, dimension, similarity, function_content, dup_function_content, create_time, update_time) value (%d, "%s", %f, "%s", "%s", "%s", "%s")"""
  429. % (dup_id, wdys2.get(x), similarity, escape_string(function_content),
  430. escape_string(dup_function_content), str(datetime.datetime.now())[:-7],
  431. str(datetime.datetime.now())[:-7]))
  432. else:
  433. # 循环遍历每一个维度
  434. contents_y = []
  435. contents_y.append(content_y)
  436. print(f"123:{content_x}")
  437. print(f"234:{contents_y}")
  438. similarity, content1, content2, idx = nlp.main(content_x, contents_y)
  439. similarity = similarity * (10 / dup_count)
  440. # print("**************相似度: %.2f%%" % similarity, "关键词: %s" % keywords_y)
  441. # 相似度相加
  442. total_similarity += similarity
  443. function_content = content_y
  444. dup_function_content = content_x
  445. # 保存每个维度对应的相似度到idc_project_check_detail
  446. mysql.sql_change_msg(
  447. """insert into idc_project_check_detail (dup_id, dimension, similarity, function_content, dup_function_content, create_time, update_time) value (%d, "%s", %f, "%s", "%s", "%s", "%s")"""
  448. % (dup_id, wdys2.get(x), similarity, escape_string(function_content),
  449. escape_string(dup_function_content), str(datetime.datetime.now())[:-7],
  450. str(datetime.datetime.now())[:-7]))
  451. print("insert 成功")
  452. mysql.sql_change_msg(
  453. """update idc_project_check set similarity=%f where dup_id=%d""" % (total_similarity, dup_id))
  454. if __name__ == "__main__":
  455. all_path = requests.get("http://127.0.0.1:19099/check/duplicates/%s" % 599).json()
  456. # print(all_path)
  457. # dict1 = {k:v for k, v in sorted(dict.items(), key= lambda item : item[1])}
  458. # print(dict1)
  459. data_list = []
  460. for ap in all_path.get("data"):
  461. # if os.path.exists(ap.get("file_path")):
  462. data_list.append((ap.get("project_id"), ap.get("file_path"), ap.get("project_name")))
  463. print(data_list)
  464. # data_list = [(11, r"C:\Users\HUAWEI\PycharmProjects\nlp\dup_check\0825-丽水系统查重维度1.xlsx", "水路运输综合监管系统建设项目.xls")]
  465. data_list = [(11, r"D:\ningda\dup_check2\dup_check\0825-丽水系统查重维度1.xlsx", "水路运输综合监管系统建设项目.xls")]
  466. project_check(data_list)