|
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550 |
- # coding=utf-8
- import sys
- import re
-
- import baidu
- import model_scope
- import mysql_pool
- from pymysql.converters import escape_string
- import cosin_similarity
- import pandas as pd
- import datetime
- import requests
- import glm_utils
- from threading import Thread
-
-
- wdys1 = {
- "项目名称": "xmmc",
- "现状问题": "xzwt",
- "系统基础": "xtjc",
- "项目目标": "xmmb",
- "预期绩效": "yqjx",
- "建设需求": "jsxq",
- "数据需求": "sjxq",
- "安全需求": "aqxq",
- "业务领域": "ywly",
- "核心业务": "hxyw",
- "业务需求": "ywxq",
- "业务协同": "ywxt",
- "建设层级": "jscj",
- "用户范围": "yhfw",
- "目标群体": "mbqt",
- "建设内容": "jsnr",
- "功能模块": "gnmk",
- "数据共享": "sjgx",
- "智能要素": "znys"
- }
- wdys2 = {
- "xmmc": "项目名称",
- "xzwt": "现状问题",
- "xtjc": "系统基础",
- "xmmb": "项目目标",
- "yqjx": "预期绩效",
- "jsxq": "建设需求",
- "sjxq": "数据需求",
- "aqxq": "安全需求",
- "ywly": "业务领域",
- "hxyw": "核心业务",
- "ywxq": "业务需求",
- "ywxt": "业务协同",
- "jscj": "建设层级",
- "yhfw": "用户范围",
- "mbqt": "目标群体",
- "jsnr": "建设内容",
- "gnmk": "功能模块",
- "sjgx": "数据共享",
- "znys": "智能要素"
- }
- gnmkys = {
- "gnmc": "功能名称",
- "gnms": "功能描述"
- }
-
-
- def getFlag():
- data_dict = {}
- df = pd.read_excel("0825.xlsx")
- data = df.values
- data = list(pd.Series(data[:, 1]).dropna())
- for d in data:
- try:
- wd = re.search("(.*?)(.*?%)", d).group(1).strip()
- wdc = wdys1.get(wd)
- if wdc:
- qz = re.search(".*?((.*?%))", d).group(1)
- data_dict[wdc] = qz
- except:
- pass
- return data_dict
-
-
- def gong_neng_mo_kuai(mysql, dl, data, er_title, str_dict_new):
- nlp = model_scope.Bert_nlp("corom")
- # 将excel文件中的所有第三维度内容进行拼接
- str_dict = {}
- for et in er_title:
- for d in data:
- if d[1] == et:
- if str_dict.get(et):
- str_dict[et] = str_dict.get(et) + d[3]
- else:
- str_dict[et] = d[3]
-
- for k, v in str_dict.items():
- mysql.sql_change_msg(
- """insert into idc_project_module (project_id, check_duplicate_count, module_name, module_content, create_time, update_time, tag) value(%d, 1, "%s", "%s", "%s", "%s", "模块")""" % (
- int(dl[0]), k, v, str(datetime.datetime.now())[:-7], str(datetime.datetime.now())[:-7]))
-
- module_id_list = mysql.sql_select_many(
- """select project_module_id, module_name, module_content from idc_project_module where project_id=%d""" % dl[
- 0])
- data_list = []
- for mil in module_id_list:
- data_dict = {}
- data_dict["project_module_id"] = mil.get("project_module_id")
- data_dict["gnmc"] = mil.get("module_name")
- data_dict["gnms"] = mil.get("module_content")
- data_list.append(data_dict)
- for i in data_list:
- # where xmmc = '南浔区信息化项目全生命周期管理系统'
- gnmk_copy1 = mysql.sql_select_many("""select * from user_history_module_data """)
- if gnmk_copy1:
- # desc_info_list = []
- # for gc in gnmk_copy1:
- # if gc.get("xmmc") != dl[2]:
- # desc_info_list.append(gc.get("gnms"))
- # similarity, s1, s2, idx = nlp.main(i.get("gnms"), desc_info_list)
- # if idx == -1:
- # continue
-
- for gc in gnmk_copy1:
- desc = glm_utils.qwenResult(i.get("gnms"), gc.get("gnms"))
- similarity_result, count = similarity_result_check(desc)
- similarity = count
- mysql.sql_change_msg(
- """insert into idc_project_module_check (project_module_id, module_name, project_name, company_name, create_time, update_time, similarity_result) value(%d, "%s", "%s", "%s", "%s", "%s", "%s")"""
- % (
- i.get("project_module_id"), escape_string(gc.get("gnmc")), escape_string(gc.get("xmmc")), "",
- str(datetime.datetime.now())[:-7],
- str(datetime.datetime.now())[:-7], similarity_result))
- dup_module_id = mysql.cur.lastrowid
- check_module_info(mysql, gc, dl, i, dup_module_id, similarity)
-
-
- def check_module_info(mysql, gc, dl, pro, dup_module_id, score):
- total_similarity1 = 0
- total_similarity2 = 0
- for j in ["gnmc", "gnms"]:
- # 循环遍历每一个模块名称
- content_x = gc.get(j)
- content_y = pro.get(j)
- if content_x and content_y:
- if j == "gnmc":
- # print("功能名称对比")
- similarity, check_desc = glm_utils.AutoDLResult(f"""请帮我分析以下两段重复语句重复的地方: \n第一段话是:'{content_y}', \n ----------------- \n 第二段话是:'{content_x}'""")
- # # 相似度相加
- if similarity is None:
- similarity = 0
- print(f"similarity is {similarity}")
- total_similarity1 += similarity/100
- mysql.sql_change_msg(
- """insert into idc_project_module_check_detail (dup_module_id, project_name, module_content, dup_module_content, similarity, dimension, create_time, update_time, check_desc) value (%d, "%s", "%s", "%s", %f, "%s", "%s", "%s", "%s")"""
- % (dup_module_id, dl[2], escape_string(content_y), escape_string(content_x), similarity,
- "功能名称",
- str(datetime.datetime.now())[:-7], str(datetime.datetime.now())[:-7], escape_string(check_desc)))
- else:
- check_desc = glm_utils.AutoDLResultNoNum(f"""请帮我分析以下两段重复语句重复的地方: \n第一段话是:'{content_y}', \n ----------------- \n 第二段话是:'{content_x}'""")
- similarity = score
- # 相似度相加 gnms
- total_similarity2 += similarity
- module_content = pro.get("gnms")
- dup_module_content = gc.get("gnms")
- mysql.sql_change_msg(
- """insert into idc_project_module_check_detail (dup_module_id, project_name, module_content, dup_module_content, similarity, dimension, create_time, update_time, check_desc) value (%d, "%s", "%s", "%s", %f, "%s", "%s", "%s", "%s")"""
- % (dup_module_id, dl[2], escape_string(module_content), escape_string(dup_module_content),
- similarity,
- "功能模块描述",
- str(datetime.datetime.now())[:-7], str(datetime.datetime.now())[:-7],
- escape_string(check_desc)))
-
- mysql.sql_change_msg("""update idc_project_module_check set similarity=%f where dup_module_id=%d""" % (
- total_similarity1 + total_similarity2, dup_module_id))
-
-
- def project_check(data_list):
- mysql = mysql_pool.ConnMysql()
- # mysql.sql_select_many("""select * from mkgjc""")
- # 读取历史数据
- xmnr_count = len(mysql.sql_select_many("""select * from user_history_data"""))
- gnmk_count = len(mysql.sql_select_many("""select * from user_history_module_data"""))
-
- nlp = model_scope.Bert_nlp("corom")
-
- # 遍历excel存储路径
- for dl in data_list:
- # path = "0825-丽水系统查重维度1.xlsx"
- # 读取路径下的excel
- print(dl,dl[1])
- df = pd.read_excel(dl[1])
- data = df.values
- # 将excel文件中的所有维度内容进行拼接
- join_str = ""
- str_dict = {}
- gnmk_str = []
- title = ""
- er_title = set()
- for d in data:
- # if pd.notnull(d[0]):
- # title = d[0]
- # if title == "功能模块":
- # er_title.add(d[1])
- # join_str = ""
- # for i in d[1:]:
- # if pd.notnull(i):
- # join_str += str(i)
- # str_dict[wdys1.get(title)] = join_str
- if pd.notnull(d[0]):
- title = d[0]
- if title == "功能模块":
- er_title.add(d[1])
- join_str = ""
- for i in d[1:]:
- if pd.notnull(i):
- join_str += str(i)
- if title == "功能模块":
- if i == '功能描述':
- continue
- else:
- gnmk_str.append(i)
- str_dict[wdys1.get(title)] = join_str
- else:
- if title == "功能模块":
- er_title.add(d[1])
- for i in d[1:]:
- if pd.notnull(i):
- join_str += str(i)
- str_dict[wdys1.get(title)] = str_dict.get(wdys1.get(title)) + join_str
- # print(str_dict)
- gnmk = ",".join(gnmk_str)
- str_dict['gnmk'] = gnmk
- mysql.sql_change_msg(
- """insert into user_data (xmmc, xzwt, xtjc, xmmb, yqjx, jsxq, sjxq, aqxq, ywly, hxyw, ywxq, ywxt, jscj, yhfw, mbqt, jsnr, gnmk, sjgx, znys) value ("%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s")"""
- % (dl[0], str_dict.get("xzwt") if str_dict.get("xzwt") else None,
- str_dict.get("xtjc") if str_dict.get("xtjc") else None,
- str_dict.get("xmmb") if str_dict.get("xmmb") else None,
- str_dict.get("yqjx") if str_dict.get("yqjx") else None,
- str_dict.get("jsxq") if str_dict.get("jsxq") else None,
- str_dict.get("sjxq") if str_dict.get("sjxq") else None,
- str_dict.get("aqxq") if str_dict.get("aqxq") else None,
- str_dict.get("ywly") if str_dict.get("ywly") else None,
- str_dict.get("hxyw") if str_dict.get("hxyw") else None,
- str_dict.get("ywxq") if str_dict.get("ywxq") else None,
- str_dict.get("ywxt") if str_dict.get("ywxt") else None,
- str_dict.get("jscj") if str_dict.get("jscj") else None,
- str_dict.get("yhfw") if str_dict.get("yhfw") else None,
- str_dict.get("mbqt") if str_dict.get("mbqt") else None,
- str_dict.get("jsnr") if str_dict.get("jsnr") else None,
- str_dict.get("gnmk") if str_dict.get("gnmk") else None,
- str_dict.get("sjgx") if str_dict.get("sjgx") else None,
- str_dict.get("znys") if str_dict.get("znys") else None))
- # 或取所有的xmnr_copy1 where xmmc = '南浔区信息化项目全生命周期管理系统'
- xmnr_copy1 = mysql.sql_select_many("""select * from user_history_data where xmmc = '富阳未来社区(乡村)一体化数智平台' """)
- # 对比xmnr_copy1和xmnr维度是否都有
- if xmnr_copy1:
- # threads = [Thread(target=check_project_info, args=(mysql, dl, xc, str_dict)) for xc in xmnr_copy1]
- # for t in threads:
- # t.start()
- #
- # for t in threads:
- # t.join()
- # pro_ths = []
- # for xc in xmnr_copy1:
- # # check_project_info(mysql, dl, xc, str_dict)
- # p = Thread(target=check_project_info, args=(mysql, dl, xc, str_dict))
- # pro_ths.append(p)
- # p.start()
- # for p in pro_ths:
- # p.join()
- xmnr_copy1_new = []
- for xc in xmnr_copy1:
- if xc["xmmc"] == str_dict.get("xmmc"):
- continue
- check_project_info(mysql, dl, xc, str_dict, nlp)
-
- # 找出相识对最高的项目通过glm分析
-
-
- mysql.sql_change_msg(
- """update idc_project set dup_status=3, one_vote_veto_status=1, self_check_status=1, history_project_count=%d ,module_count=%d where project_id=%d""" % (
- xmnr_count, gnmk_count, dl[0]))
- gong_neng_mo_kuai(mysql, dl, data, er_title, str_dict)
-
-
- def check_project_info(mysql, dl, xc, str_dict, nlp):
- total_keywords = {}
- total_similarity = 0
- dup_count = 0
- # 保存相加后的相似度到idc_project_check
- mysql.sql_change_msg(
- """insert into idc_project_check (project_id, dup_project_name, file_path, company_name, create_year, project_tag, project_range_tag, project_area, create_time, update_time) value ("%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s")"""
- % (dl[0], escape_string(xc.get("xmmc")), escape_string(dl[1]), "", "", "需求相似、业务相似", "历史项目", "",
- str(datetime.datetime.now())[:-7], str(datetime.datetime.now())[:-7]))
- dup_id = mysql.cur.lastrowid
- for x in list(xc.keys())[1:]:
- content_x = xc.get(x)
- content_y = str_dict.get(x)
- if content_x and content_y:
- if x == 'gnmk':
- continue
- elif x == 'jsnr':
- continue
- else:
- dup_count += 1
- if ((xc['gnmk'] == 'None' or xc['gnmk'] is None or str.strip(xc['gnmk']) == '') and (str_dict['gnmk'] is None or str.strip(str_dict['gnmk']) == '')) and (
- not xc['jsnr'] is None and xc['jsnr'] != 'None' and not str_dict['jsnr'] is None and len(str.strip(str_dict['jsnr'])) > 0):
- for x in list(xc.keys())[1:]:
- content_x = xc.get(x)
- content_y = str_dict.get(x)
- if content_x and content_y:
- if x == 'gnmk':
- # 循环遍历每一个维度
- # contents_y = []
- # contents_y.append(content_y)
- # similarity, content1, content2, idx = nlp.main(content_x, contents_y)
-
- desc = glm_utils.qwenResult(content_y, content_x)
- similarity_result, count = similarity_result_check(desc)
- similarity = count * 0
- # print("**************相似度: %.2f%%" % similarity, "关键词: %s" % keywords_y)
- # 相似度相加
- total_similarity += similarity
- function_content = content_y
- dup_function_content = content_x
- # 保存每个维度对应的相似度到idc_project_check_detail
- mysql.sql_change_msg(
- """insert into idc_project_check_detail (dup_id, dimension, similarity, function_content, dup_function_content, create_time, update_time, similarity_result) value (%d, "%s", %f, "%s", "%s", "%s", "%s", "%s")"""
- % (dup_id, wdys2.get(x), similarity, escape_string(function_content),
- escape_string(dup_function_content), str(datetime.datetime.now())[:-7],
- str(datetime.datetime.now())[:-7], similarity_result))
- elif x == 'jsnr':
- # 循环遍历每一个维度
- # contents_y = []
- # contents_y.append(content_y)
- # similarity, content1, content2, idx = nlp.main(content_x, contents_y)
-
- desc = glm_utils.qwenResult(content_y, content_x)
- similarity_result, count = similarity_result_check(desc)
- similarity = count * 40
- # print("**************相似度: %.2f%%" % similarity, "关键词: %s" % keywords_y)
- # 相似度相加
- total_similarity += similarity
-
- function_content = content_y
- dup_function_content = content_x
- # 保存每个维度对应的相似度到idc_project_check_detail
- mysql.sql_change_msg(
- """insert into idc_project_check_detail (dup_id, dimension, similarity, function_content, dup_function_content, create_time, update_time, similarity_result) value (%d, "%s", %f, "%s", "%s", "%s", "%s", "%s")"""
- % (dup_id, wdys2.get(x), similarity, escape_string(function_content),
- escape_string(dup_function_content), str(datetime.datetime.now())[:-7],
- str(datetime.datetime.now())[:-7], similarity_result))
- else:
- # 循环遍历每一个维度
- # contents_y = []
- # contents_y.append(content_y)
- # similarity, content1, content2, idx = nlp.main(content_x, contents_y)
-
- desc = glm_utils.qwenResult(content_y, content_x)
- similarity_result, count = similarity_result_check(desc)
- similarity = count * (60 / dup_count)
- # print("**************相似度: %.2f%%" % similarity, "关键词: %s" % keywords_y)
- # 相似度相加
- total_similarity += similarity
-
- function_content = content_y
- dup_function_content = content_x
-
- # 保存每个维度对应的相似度到idc_project_check_detail
- mysql.sql_change_msg(
- """insert into idc_project_check_detail (dup_id, dimension, similarity, function_content, dup_function_content, create_time, update_time, similarity_result) value (%d, "%s", %f, "%s", "%s", "%s", "%s", "%s")"""
- % (dup_id, wdys2.get(x), similarity, escape_string(function_content),
- escape_string(dup_function_content), str(datetime.datetime.now())[:-7],
- str(datetime.datetime.now())[:-7], similarity_result))
- elif ((xc['jsnr'] == 'None' or xc['jsnr'] is None or str.strip(xc['jsnr']) == '') and (str_dict['jsnr'] is None or str.strip(str_dict['jsnr']) == '')) and (
- not xc['gnmk'] is None and xc['gnmk'] != 'None' and not str_dict['gnmk'] is None and len(str.strip(str_dict['gnmk'])) > 0):
- for x in list(xc.keys())[1:]:
- content_x = xc.get(x)
- content_y = str_dict.get(x)
- if content_x and content_y:
- if x == 'gnmk':
- # 循环遍历每一个维度
- # contents_y = []
- # contents_y.append(content_y)
- # similarity, content1, content2, idx = nlp.main(content_x, contents_y)
-
- desc = glm_utils.qwenResult(content_y, content_x)
- similarity_result, count = similarity_result_check(desc)
-
- similarity = count * 50
- # print("**************相似度: %.2f%%" % similarity, "关键词: %s" % keywords_y)
- # 相似度相加
- total_similarity += similarity
-
- function_content = content_y
- dup_function_content = content_x
- # 保存每个维度对应的相似度到idc_project_check_detail
- mysql.sql_change_msg(
- """insert into idc_project_check_detail (dup_id, dimension, similarity, function_content, dup_function_content, create_time, update_time, similarity_result) value (%d, "%s", %f, "%s", "%s", "%s", "%s", "%s")"""
- % (dup_id, wdys2.get(x), similarity, escape_string(function_content),
- escape_string(dup_function_content), str(datetime.datetime.now())[:-7],
- str(datetime.datetime.now())[:-7], similarity_result))
- elif x == 'jsnr':
- # 循环遍历每一个维度
- # contents_y = []
- # contents_y.append(content_y)
- # similarity, content1, content2, idx = nlp.main(content_x, contents_y)
-
- desc = glm_utils.qwenResult(content_y, content_x)
- similarity_result, count = similarity_result_check(desc)
-
- similarity = count * 0
- # print("**************相似度: %.2f%%" % similarity, "关键词: %s" % keywords_y)
- # 相似度相加
- total_similarity += similarity
-
- function_content = content_y
- dup_function_content = content_x
- # 保存每个维度对应的相似度到idc_project_check_detail
- mysql.sql_change_msg(
- """insert into idc_project_check_detail (dup_id, dimension, similarity, function_content, dup_function_content, create_time, update_time, similarity_result) value (%d, "%s", %f, "%s", "%s", "%s", "%s", "%s")"""
- % (dup_id, wdys2.get(x), similarity, escape_string(function_content),
- escape_string(dup_function_content), str(datetime.datetime.now())[:-7],
- str(datetime.datetime.now())[:-7], similarity_result))
- else:
- # 循环遍历每一个维度
- # contents_y = []
- # contents_y.append(content_y)
- # similarity, content1, content2, idx = nlp.main(content_x, contents_y)
-
- desc = glm_utils.qwenResult(content_y, content_x)
- similarity_result, count = similarity_result_check(desc)
-
- similarity = count * (50 / dup_count)
- # print("**************相似度: %.2f%%" % similarity, "关键词: %s" % keywords_y)
- # 相似度相加
- total_similarity += similarity
-
- function_content = content_y
- dup_function_content = content_x
- # 保存每个维度对应的相似度到idc_project_check_detail
- mysql.sql_change_msg(
- """insert into idc_project_check_detail (dup_id, dimension, similarity, function_content, dup_function_content, create_time, update_time, similarity_result) value (%d, "%s", %f, "%s", "%s", "%s", "%s", "%s")"""
- % (dup_id, wdys2.get(x), similarity, escape_string(function_content),
- escape_string(dup_function_content), str(datetime.datetime.now())[:-7],
- str(datetime.datetime.now())[:-7], similarity_result))
- else:
- for x in list(xc.keys())[1:]:
- content_x = xc.get(x)
- content_y = str_dict.get(x)
- if content_x and content_y:
- if x == 'gnmk':
- # 循环遍历每一个维度
- # contents_y = []
- # contents_y.append(content_y)
- # similarity, content1, content2, idx = nlp.main(content_x, contents_y)
- desc = glm_utils.qwenResult(content_y, content_x)
- similarity_result, count = similarity_result_check(desc)
-
- similarity = count * 50
-
- # 相似度相加
- total_similarity += similarity
-
- function_content = content_y
- dup_function_content = content_x
- # 保存每个维度对应的相似度到idc_project_check_detail
- mysql.sql_change_msg(
- """insert into idc_project_check_detail (dup_id, dimension, similarity, function_content, dup_function_content, create_time, update_time, similarity_result) value (%d, "%s", %f, "%s", "%s", "%s", "%s", "%s")"""
- % (dup_id, wdys2.get(x), similarity, escape_string(function_content),
- escape_string(dup_function_content), str(datetime.datetime.now())[:-7],
- str(datetime.datetime.now())[:-7], similarity_result))
- elif x == 'jsnr':
- # 循环遍历每一个维度
- # contents_y = []
- # contents_y.append(content_y)
- # similarity, content1, content2, idx = nlp.main(content_x, contents_y)
-
- desc = glm_utils.qwenResult(content_y, content_x)
- similarity_result, count = similarity_result_check(desc)
-
- similarity = count * 40
- # 相似度相加
- total_similarity += similarity
-
- function_content = content_y
- dup_function_content = content_x
- # 保存每个维度对应的相似度到idc_project_check_detail
- mysql.sql_change_msg(
- """insert into idc_project_check_detail (dup_id, dimension, similarity, function_content, dup_function_content, create_time, update_time, similarity_result) value (%d, "%s", %f, "%s", "%s", "%s", "%s", "%s")"""
- % (dup_id, wdys2.get(x), similarity, escape_string(function_content),
- escape_string(dup_function_content), str(datetime.datetime.now())[:-7],
- str(datetime.datetime.now())[:-7], similarity_result))
- else:
- # 循环遍历每一个维度
- # contents_y = []
- # contents_y.append(content_y)
- # similarity, content1, content2, idx = nlp.main(content_x, contents_y)
-
- desc = glm_utils.qwenResult(content_y, content_x)
- similarity_result, count = similarity_result_check(desc)
-
- similarity = count * (10 / dup_count)
- # print("**************相似度: %.2f%%" % similarity, "关键词: %s" % keywords_y)
- # 相似度相加
- total_similarity += similarity
-
- function_content = content_y
- dup_function_content = content_x
- # 保存每个维度对应的相似度到idc_project_check_detail
- mysql.sql_change_msg(
- """insert into idc_project_check_detail (dup_id, dimension, similarity, function_content, dup_function_content, create_time, update_time, similarity_result) value (%d, "%s", %f, "%s", "%s", "%s", "%s", "%s")"""
- % (dup_id, wdys2.get(x), similarity, escape_string(function_content),
- escape_string(dup_function_content), str(datetime.datetime.now())[:-7],
- str(datetime.datetime.now())[:-7], similarity_result))
-
- mysql.sql_change_msg(
- """update idc_project_check set similarity=%f where dup_id=%d""" % (total_similarity, dup_id))
-
-
- if __name__ == "__main__":
- all_path = requests.get("http://127.0.0.1:19099/check/duplicates/%s" % 599).json()
- # print(all_path)
-
- # dict1 = {k:v for k, v in sorted(dict.items(), key= lambda item : item[1])}
- # print(dict1)
-
- data_list = []
- for ap in all_path.get("data"):
- # if os.path.exists(ap.get("file_path")):
- data_list.append((ap.get("project_id"), ap.get("file_path"), ap.get("project_name")))
- print(data_list)
- # data_list = [(11, r"C:\Users\HUAWEI\PycharmProjects\nlp\dup_check\0825-丽水系统查重维度1.xlsx", "水路运输综合监管系统建设项目.xls")]
- data_list = [(11, r"D:\ningda\dup_check2\dup_check\0825-丽水系统查重维度1.xlsx", "水路运输综合监管系统建设项目.xls")]
- project_check(data_list)
-
-
- # 对比相似度
- def similarity_result_check(desc):
- similarity_result = ""
- similarity_result_count = 0
- if len(desc) > 7:
- if desc[6:7] == "高":
- similarity_result = "非常相似"
- similarity_result_count = 90
- elif desc[6:7] == "中":
- similarity_result = "比较相似"
- similarity_result_count = 60
- elif desc[6:7] == "低":
- similarity_result = "相似度低"
- similarity_result_count = 30
- return similarity_result, similarity_result_count
|