@@ -0,0 +1,84 @@ | |||
# coding=utf-8 | |||
import re | |||
import html | |||
import jieba | |||
import jieba.analyse | |||
from sklearn.metrics.pairwise import cosine_similarity | |||
class CosineSimilarity(object): | |||
""" | |||
余弦相似度 | |||
""" | |||
def __init__(self, content_x1, content_y2): | |||
self.s1 = content_x1 | |||
self.s2 = content_y2 | |||
@staticmethod | |||
def extract_keyword(content): # 提取关键词 | |||
# 正则过滤 html 标签 | |||
re_exp = re.compile(r'(<style>.*?</style>)|(<[^>]+>)', re.S) | |||
content = re_exp.sub(' ', content) | |||
# html 转义符实体化 | |||
content = html.unescape(content) | |||
# 切割 | |||
seg = [i for i in jieba.cut(content, cut_all=True) if i != ''] | |||
# 提取关键词 | |||
keywords = jieba.analyse.extract_tags("|".join(seg), topK=200, withWeight=False, allowPOS=('n', 'nr', 'ns')) | |||
# print(keywords) | |||
# return keywords | |||
return seg,keywords | |||
@staticmethod | |||
def one_hot(word_dict, keywords): # oneHot编码 | |||
# cut_code = [word_dict[word] for word in keywords] | |||
cut_code = [0]*len(word_dict) | |||
for word in keywords: | |||
cut_code[word_dict[word]] += 1 | |||
return cut_code | |||
def main(self): | |||
# 去除停用词 | |||
# jieba.analyse.set_stop_words('stopword1.txt') | |||
# 提取关键词 | |||
# keywords1 = self.extract_keyword(self.s1) | |||
# keywords2 = self.extract_keyword(self.s2) | |||
seg1,keywords1 = self.extract_keyword(self.s1) | |||
seg2,keywords2 = self.extract_keyword(self.s2) | |||
# 词的并集 | |||
union = set(keywords1).union(set(keywords2)) | |||
# union = set(seg1).union(set(seg2)) | |||
# 编码 | |||
word_dict = {} | |||
i = 0 | |||
for word in union: | |||
word_dict[word] = i | |||
i += 1 | |||
# oneHot编码 | |||
s1_cut_code = self.one_hot(word_dict, keywords1) | |||
s2_cut_code = self.one_hot(word_dict, keywords2) | |||
# s1_cut_code = self.one_hot(word_dict, seg1) | |||
# s2_cut_code = self.one_hot(word_dict, seg2) | |||
# 余弦相似度计算 | |||
sample = [s1_cut_code, s2_cut_code] | |||
# 除零处理 | |||
try: | |||
sim = cosine_similarity(sample) | |||
return sim[1][0],keywords1,keywords2 | |||
except Exception as e: | |||
print(e) | |||
return 0.0,keywords1,keywords2 | |||
# 测试 | |||
if __name__ == '__main__': | |||
with open(r'D:\pythonDM\Ndkj\live111\result\1.txt', encoding='UTF-8') as x, open(r'D:\pythonDM\Ndkj\live111\result\2.txt', encoding='UTF-8') as y: | |||
content_x = x.read() | |||
content_y = y.read() | |||
similarity = CosineSimilarity(content_x, content_y) | |||
# similarity = CosineSimilarity(file, file2) | |||
similarity = similarity.main() | |||
print(similarity) | |||
print('相似度: %.2f%%' % (similarity*32)) |
@@ -0,0 +1,42 @@ | |||
# coding=utf-8 | |||
from flask import Flask, redirect, url_for, request | |||
import sys | |||
from flask import jsonify | |||
import mysql_pool | |||
import main1 | |||
import cosin_similarity | |||
# import xm | |||
# from xm import xsd | |||
app = Flask(__name__) | |||
# mysql = mysql_pool.ConnMysql() | |||
# 返回excel的保存地址 | |||
@app.route('/check/duplicates/<projectId>') | |||
def success(projectId): | |||
mysql=mysql_pool.ConnMysql() | |||
if int(projectId) == 0: | |||
data = mysql.sql_select_many("""select * from idc_project""") | |||
else: | |||
data = mysql.sql_select_many("""select * from idc_project where project_id=%s""" % projectId) | |||
print(data) | |||
data_list = [] | |||
for ap in data: | |||
# if os.path.exists(ap.get("file_path")): | |||
data_list.append((ap.get("project_id"), ap.get("file_path"), ap.get("project_name"))) | |||
mysql.release() | |||
# print(data_list) | |||
main1.project_check(data_list) | |||
return jsonify({"code": 0, "data": data}) | |||
# 去数据库idc_project里面拿数据,获取比如project_id=11,根据file_path地址拿到要开始处理的数据 | |||
if __name__ == '__main__': | |||
# app.run(host="0.0.0.0", port=19099) | |||
app.run(port=19099) |
@@ -0,0 +1,283 @@ | |||
# coding=utf-8 | |||
import re | |||
import mysql_pool | |||
from pymysql.converters import escape_string | |||
import cosin_similarity | |||
import pandas as pd | |||
import datetime | |||
import requests | |||
import os | |||
wdys1 = { | |||
"项目名称": "xmmc", | |||
"现状问题": "xzwt", | |||
"系统基础": "xtjc", | |||
"项目目标": "xmmb", | |||
"预期绩效": "yqjx", | |||
"建设需求": "jsxq", | |||
"数据需求": "sjxq", | |||
"安全需求": "aqxq", | |||
"业务领域": "ywly", | |||
"核心业务": "hxyw", | |||
"业务需求": "ywxq", | |||
"业务协同": "ywxt", | |||
"建设层级": "jscj", | |||
"用户范围": "yhfw", | |||
"目标群体": "mbqt", | |||
"建设内容": "jsnr", | |||
"功能模块": "gnmk", | |||
"数据共享": "sjgx", | |||
"智能要素": "znys", | |||
"申报单位": "sbdw", | |||
"所属地区": "ssdq", | |||
"预算年度": "ysnd" | |||
} | |||
wdys2 = { | |||
"xmmc": "项目名称", | |||
"xzwt": "现状问题", | |||
"xtjc": "系统基础", | |||
"xmmb": "项目目标", | |||
"yqjx": "预期绩效", | |||
"jsxq": "建设需求", | |||
"sjxq": "数据需求", | |||
"aqxq": "安全需求", | |||
"ywly": "业务领域", | |||
"hxyw": "核心业务", | |||
"ywxq": "业务需求", | |||
"ywxt": "业务协同", | |||
"jscj": "建设层级", | |||
"yhfw": "用户范围", | |||
"mbqt": "目标群体", | |||
"jsnr": "建设内容", | |||
"gnmk": "功能模块", | |||
"sjgx": "数据共享", | |||
"znys": "智能要素", | |||
"sbdw": "申报单位", | |||
"ssdq": "所属地区", | |||
"ysnd": "预算年度" | |||
} | |||
gnmkys = { | |||
"gnmc": "功能名称", | |||
"gnms": "功能描述" | |||
} | |||
def getFlag(): | |||
data_dict = {} | |||
df = pd.read_excel("0825-丽水系统查重维度.xlsx") | |||
data = df.values | |||
data = list(pd.Series(data[:, 1]).dropna()) | |||
for d in data: | |||
try: | |||
wd = re.search("(.*?)(.*?%)", d).group(1).strip() | |||
wdc = wdys1.get(wd) | |||
if wdc: | |||
qz = re.search(".*?((.*?%))", d).group(1) | |||
data_dict[wdc] = qz | |||
except: | |||
pass | |||
return data_dict | |||
def gong_neng_mo_kuai(xmmc, mysql, dl, data, er_title): | |||
# 将excel文件中的所有第三维度内容进行拼接 | |||
str_dict = {} | |||
for et in er_title: | |||
for d in data: | |||
if d[1] == et: | |||
if str_dict.get(et): | |||
str_dict[et] = str_dict.get(et) + d[3] | |||
else: | |||
str_dict[et] = d[3] | |||
for k, v in str_dict.items(): | |||
mysql.sql_change_msg("""insert into user_history_module_data_total(xmmc,gnmc,gnms) value("%s", "%s", "%s")""" % ( | |||
escape_string(xmmc), escape_string(k), escape_string(v))) | |||
# | |||
# similarity = cosin_similarity.CosineSimilarity(v, v) | |||
# similarity, keywords_x, keywords_y = similarity.main() | |||
# mysql.sql_change_msg("""insert into user_history_module_keywords (xmmc,gnmc,gnms) value("%s" ,"%s", "%s")""" % ( | |||
# xmmc, escape_string(k), str(keywords_y)[dup_file_test:-dup_file_test])) | |||
def project_check(data_list): | |||
mysql = mysql_pool.ConnMysql() | |||
# 读取维度和权重 | |||
# get_data_dict = getFlag() | |||
# 遍历excel存储路径 | |||
for dl in data_list: | |||
# path = "0825-丽水系统查重维度1.xlsx" | |||
# 读取路径下的excel | |||
print(dl) | |||
df = pd.read_excel(dl[1]) | |||
xmmc = df.keys() | |||
# print(type(xmmc[dup_file_test])) | |||
xmmc=xmmc[1] | |||
# print(type(xmmc)) | |||
# xmmc1='' | |||
if "可研报告"or "可研性报告"or "可行性研究报告" in xmmc: | |||
xmmc=xmmc.replace('可研报告','') | |||
xmmc=xmmc.replace('可研性报告','') | |||
xmmc=xmmc.replace('可行性研究报告','') | |||
# print(xmmc) | |||
data = df.values | |||
# 将excel文件中的所有维度内容进行拼接 | |||
join_str = "" | |||
str_dict = {} | |||
title = "" | |||
er_title = set() | |||
# for d in data: | |||
# # print(d) | |||
# if pd.notnull(d[0]): | |||
# title = d[0] | |||
# if title == "功能模块": | |||
# er_title.add(d[dup_file_test]) | |||
# join_str = "" | |||
# for i in d[dup_file_test:]: | |||
# if pd.notnull(i): | |||
# join_str += i | |||
# str_dict[wdys1.get(title)] = join_str | |||
# else: | |||
# if title == "功能模块": | |||
# er_title.add(d[dup_file_test]) | |||
# for i in d[dup_file_test:]: | |||
# if pd.notnull(i): | |||
# join_str += i | |||
# str_dict[wdys1.get(title)] = str_dict.get(wdys1.get(title)) + join_str | |||
# print(str_dict) | |||
gnmk_str = [] | |||
# print(data) | |||
for d in data: | |||
if pd.notnull(d[0]): | |||
title = d[0] | |||
if title == "功能模块": | |||
er_title.add(d[1]) | |||
join_str = "" | |||
for i in d[1:]: | |||
# print(type(i)) | |||
# i=str(i) | |||
if pd.notnull(i): | |||
join_str += str(i) | |||
if title == "功能模块": | |||
# for j in d[3:]: | |||
if i == '功能描述': | |||
continue | |||
else: | |||
gnmk_str.append(i) | |||
str_dict[wdys1.get(title)] = join_str | |||
# print(str_dict.get(wdys1.get(title))) | |||
else: | |||
if title == "功能模块": | |||
er_title.add(d[1]) | |||
for i in d[3:]: | |||
if pd.notnull(i): | |||
join_str += str(i) | |||
if title == "功能模块": | |||
gnmk_str.append(i) | |||
str_dict[wdys1.get(title)] = str_dict.get(wdys1.get(title)) + join_str | |||
# gnmk="".join(gnmk_str) | |||
# str_dict['gnmk']=gnmk | |||
gnmk = "".join(gnmk_str) | |||
str_dict['gnmk'] = gnmk | |||
# print(str_dict) | |||
# print(str_dict.get("xzwt")if str_dict.get("xzwt") else None) | |||
# print(str_dict.get('gnmk')if str_dict.get('gnmk')else None) | |||
mysql.sql_change_msg( | |||
"""insert into user_history_data_total (xmmc, xzwt, xtjc, xmmb, yqjx, jsxq, sjxq, aqxq, ywly, hxyw, ywxq, ywxt, jscj, yhfw, mbqt, jsnr, gnmk, sjgx, znys,sbdw,ssdq,ysnd) value ("%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s","%s","%s","%s")""" | |||
% (escape_string(xmmc), | |||
escape_string(str_dict.get("xzwt")) if str_dict.get("xzwt") else None, | |||
escape_string(str_dict.get("xtjc")) if str_dict.get("xtjc") else None, | |||
escape_string(str_dict.get("xmmb")) if str_dict.get("xmmb") else None, | |||
escape_string(str_dict.get("yqjx")) if str_dict.get("yqjx") else None, | |||
escape_string(str_dict.get("jsxq")) if str_dict.get("jsxq") else None, | |||
escape_string(str_dict.get("sjxq")) if str_dict.get("sjxq") else None, | |||
escape_string(str_dict.get("aqxq")) if str_dict.get("aqxq") else None, | |||
escape_string(str_dict.get("ywly")) if str_dict.get("ywly") else None, | |||
escape_string(str_dict.get("hxyw")) if str_dict.get("hxyw") else None, | |||
escape_string(str_dict.get("ywxq")) if str_dict.get("ywxq") else None, | |||
escape_string(str_dict.get("ywxt")) if str_dict.get("ywxt") else None, | |||
escape_string(str_dict.get("jscj")) if str_dict.get("jscj") else None, | |||
escape_string(str_dict.get("yhfw")) if str_dict.get("yhfw") else None, | |||
escape_string(str_dict.get("mbqt")) if str_dict.get("mbqt") else None, | |||
escape_string(str_dict.get("jsnr")) if str_dict.get("jsnr") else None, | |||
escape_string(str_dict.get("gnmk")) if str_dict.get("gnmk") else None, | |||
escape_string(str_dict.get("sjgx")) if str_dict.get("sjgx") else None, | |||
escape_string(str_dict.get("znys")) if str_dict.get("znys") else None, | |||
escape_string(str_dict.get("sbdw")) if str_dict.get("sbdw") else None, | |||
escape_string(str_dict.get("ssdq")) if str_dict.get("ssdq") else None, | |||
escape_string(str_dict.get("ysnd")) if str_dict.get("ysnd") else None | |||
)) | |||
# project_gjc = {} | |||
# for w in wdys2.keys(): | |||
# content_x = str_dict.get(w) | |||
# content_y = str_dict.get(w) | |||
# if content_x and content_y: | |||
# # 循环遍历每一个维度 | |||
# similarity = cosin_similarity.CosineSimilarity(content_x, content_y) | |||
# # 相似度 关键词 | |||
# similarity, keywords_x, keywords_y = similarity.main() | |||
# project_gjc[w] = keywords_y | |||
# mysql.sql_change_msg( | |||
# """insert into user_history_keywords (xmmc, xzwt, xtjc, xmmb, yqjx, jsxq, sjxq, aqxq, ywly, hxyw, ywxq, ywxt, jscj, yhfw, mbqt, jsnr, gnmk, sjgx, znys) value ("%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s")""" | |||
# % (xmmc, str(project_gjc.get("xzwt"))[dup_file_test:-dup_file_test] if project_gjc.get("xzwt") else None, | |||
# str(project_gjc.get("xtjc"))[dup_file_test:-dup_file_test] if project_gjc.get("xtjc") else None, | |||
# str(project_gjc.get("xmmb"))[dup_file_test:-dup_file_test] if project_gjc.get("xmmb") else None, | |||
# str(project_gjc.get("yqjx"))[dup_file_test:-dup_file_test] if project_gjc.get("yqjx") else None, | |||
# str(project_gjc.get("jsxq"))[dup_file_test:-dup_file_test] if project_gjc.get("jsxq") else None, | |||
# str(project_gjc.get("sjxq"))[dup_file_test:-dup_file_test] if project_gjc.get("sjxq") else None, | |||
# str(project_gjc.get("aqxq"))[dup_file_test:-dup_file_test] if project_gjc.get("aqxq") else None, | |||
# str(project_gjc.get("ywly"))[dup_file_test:-dup_file_test] if project_gjc.get("ywly") else None, | |||
# str(project_gjc.get("hxyw"))[dup_file_test:-dup_file_test] if project_gjc.get("hxyw") else None, | |||
# str(project_gjc.get("ywxq"))[dup_file_test:-dup_file_test] if project_gjc.get("ywxq") else None, | |||
# str(project_gjc.get("ywxt"))[dup_file_test:-dup_file_test] if project_gjc.get("ywxt") else None, | |||
# str(project_gjc.get("jscj"))[dup_file_test:-dup_file_test] if project_gjc.get("jscj") else None, | |||
# str(project_gjc.get("yhfw"))[dup_file_test:-dup_file_test] if project_gjc.get("yhfw") else None, | |||
# str(project_gjc.get("mbqt"))[dup_file_test:-dup_file_test] if project_gjc.get("mbqt") else None, | |||
# str(project_gjc.get("jsnr"))[dup_file_test:-dup_file_test] if project_gjc.get("jsnr") else None, | |||
# str(project_gjc.get("gnmk"))[dup_file_test:-dup_file_test] if project_gjc.get("gnmk") else None, | |||
# str(project_gjc.get("sjgx"))[dup_file_test:-dup_file_test] if project_gjc.get("sjgx") else None, | |||
# str(project_gjc.get("znys"))[dup_file_test:-dup_file_test] if project_gjc.get("znys") else None)) | |||
gong_neng_mo_kuai(xmmc, mysql, dl, data, er_title) | |||
if __name__ == "__main__": | |||
path = r"D:\dup_file_test" | |||
data_list = os.listdir(path) | |||
print(len(data_list)) | |||
for file in data_list: | |||
# print(path+'\\'+file) | |||
data_list = [(0, path + '\\' + file, "")] | |||
project_check(data_list) | |||
print("已存入************************************* %s" % file) | |||
""" | |||
建设目标,业务功能 | |||
gnmk_str = [] | |||
for d in data: | |||
if pd.notnull(d[0]): | |||
title = d[0] | |||
if title == "功能模块": | |||
er_title.add(d[dup_file_test]) | |||
join_str = "" | |||
for i in d[dup_file_test:]: | |||
if pd.notnull(i): | |||
join_str += i | |||
if title == "功能模块": | |||
gnmk_str.append(i) | |||
str_dict[wdys1.get(title)] = join_str | |||
else: | |||
if title == "功能模块": | |||
er_title.add(d[dup_file_test]) | |||
for i in d[dup_file_test:]: | |||
if pd.notnull(i): | |||
join_str += i | |||
if title == "功能模块": | |||
gnmk_str.append(i) | |||
str_dict[wdys1.get(title)] = str_dict.get(wdys1.get(title)) + join_str | |||
gnmk = "".join(gnmk_str) | |||
""" |
@@ -0,0 +1,577 @@ | |||
# coding=utf-8 | |||
import sys | |||
import re | |||
import mysql_pool | |||
from pymysql.converters import escape_string | |||
import cosin_similarity | |||
import pandas as pd | |||
import datetime | |||
import requests | |||
import os | |||
import pymysql | |||
wdys1 = { | |||
"项目名称": "xmmc", | |||
"现状问题": "xzwt", | |||
"系统基础": "xtjc", | |||
"项目目标": "xmmb", | |||
"预期绩效": "yqjx", | |||
"建设需求": "jsxq", | |||
"数据需求": "sjxq", | |||
"安全需求": "aqxq", | |||
"业务领域": "ywly", | |||
"核心业务": "hxyw", | |||
"业务需求": "ywxq", | |||
"业务协同": "ywxt", | |||
"建设层级": "jscj", | |||
"用户范围": "yhfw", | |||
"目标群体": "mbqt", | |||
"建设内容": "jsnr", | |||
"功能模块": "gnmk", | |||
"数据共享": "sjgx", | |||
"智能要素": "znys" | |||
} | |||
wdys2 = { | |||
"xmmc": "项目名称", | |||
"xzwt": "现状问题", | |||
"xtjc": "系统基础", | |||
"xmmb": "项目目标", | |||
"yqjx": "预期绩效", | |||
"jsxq": "建设需求", | |||
"sjxq": "数据需求", | |||
"aqxq": "安全需求", | |||
"ywly": "业务领域", | |||
"hxyw": "核心业务", | |||
"ywxq": "业务需求", | |||
"ywxt": "业务协同", | |||
"jscj": "建设层级", | |||
"yhfw": "用户范围", | |||
"mbqt": "目标群体", | |||
"jsnr": "建设内容", | |||
"gnmk": "功能模块", | |||
"sjgx": "数据共享", | |||
"znys": "智能要素" | |||
} | |||
gnmkys = { | |||
"gnmc": "功能名称", | |||
"gnms": "功能描述" | |||
} | |||
def getFlag(): | |||
data_dict = {} | |||
df = pd.read_excel("0825-丽水系统查重维度.xlsx") | |||
data = df.values | |||
data = list(pd.Series(data[:, 1]).dropna()) | |||
for d in data: | |||
try: | |||
wd = re.search("(.*?)(.*?%)", d).group(1).strip() | |||
wdc = wdys1.get(wd) | |||
if wdc: | |||
qz = re.search(".*?((.*?%))", d).group(1) | |||
data_dict[wdc] = qz | |||
except: | |||
pass | |||
return data_dict | |||
def gong_neng_mo_kuai(mysql, dl, data, er_title): | |||
# 将excel文件中的所有第三维度内容进行拼接 | |||
str_dict = {} | |||
for et in er_title: | |||
for d in data: | |||
if d[1] == et: | |||
if str_dict.get(et): | |||
str_dict[et] = str_dict.get(et) + d[3] | |||
else: | |||
str_dict[et] = d[3] | |||
for k, v in str_dict.items(): | |||
mysql.sql_change_msg( | |||
"""insert into idc_project_module (project_id, check_duplicate_count, module_name, module_content, create_time, update_time, tag) value(%d, dup_file_test, "%s", "%s", "%s", "%s", "模块")""" % ( | |||
int(dl[0]), k, v, str(datetime.datetime.now())[:-7], str(datetime.datetime.now())[:-7])) | |||
module_id_list = mysql.sql_select_many( | |||
"""select project_module_id, module_name, module_content from idc_project_module where project_id=%d""" % dl[ | |||
0]) | |||
data_list = [] | |||
for mil in module_id_list: | |||
data_dict = {} | |||
data_dict["project_module_id"] = mil.get("project_module_id") | |||
data_dict["gnmc"] = mil.get("module_name") | |||
data_dict["gnms"] = mil.get("module_content") | |||
data_list.append(data_dict) | |||
# print(data_list) | |||
for i in data_list: | |||
gnmk_copy1 = mysql.sql_select_many("""select * from user_history_module_data""") | |||
if gnmk_copy1: | |||
for gc in gnmk_copy1: | |||
total_similarity1 = 0 | |||
total_keywords1 = [] | |||
total_similarity2 = 0 | |||
total_keywords2 = [] | |||
mysql.sql_change_msg( | |||
"""insert into idc_project_module_check (project_module_id, module_name, project_name, company_name, create_time, update_time) value(%d, "%s", "%s", "%s", "%s", "%s")""" | |||
% ( | |||
i.get("project_module_id"), gc.get("gnmc"), gc.get("xmmc"), "", | |||
str(datetime.datetime.now())[:-7], | |||
str(datetime.datetime.now())[:-7])) | |||
dup_module_id = mysql.cur.lastrowid | |||
for j in ["gnmc", "gnms"]: | |||
# 循环遍历每一个模块名称 | |||
content_x = gc.get(j) | |||
content_y = i.get(j) | |||
if content_x and content_y: | |||
if j == "gnmc": | |||
similarity = cosin_similarity.CosineSimilarity(content_x, content_y) | |||
# 相似度 关键词 | |||
similarity, keyword_x, keywords = similarity.main() | |||
similarity = similarity * 1 | |||
total_keywords1 += keywords | |||
#print("######################相似度: %.2f%%" % similarity, "关键词: %s" % keywords) | |||
# 相似度相加 | |||
total_similarity1 += similarity | |||
mysql.sql_change_msg( | |||
"""insert into idc_project_module_check_detail (dup_module_id, project_name, module_content, dup_module_content, similarity, dimension, create_time, update_time) value (%d, "%s", "%s", "%s", %f, "%s", "%s", "%s")""" | |||
% (dup_module_id, dl[2], escape_string(content_y), escape_string(content_x), similarity, | |||
"功能名称", | |||
str(datetime.datetime.now())[:-7], str(datetime.datetime.now())[:-7])) | |||
else: | |||
similarity = cosin_similarity.CosineSimilarity(content_x, content_y) | |||
# 相似度 关键词 | |||
similarity, keyword_x, keywords = similarity.main() | |||
similarity = similarity * 99 | |||
total_keywords2 += keywords | |||
#print("######################相似度: %.2f%%" % similarity, "关键词: %s" % keywords) | |||
# 相似度相加 | |||
total_similarity2 += similarity | |||
mysql.sql_change_msg( | |||
"""insert into idc_project_module_check_detail (dup_module_id, project_name, module_content, dup_module_content, similarity, dimension, create_time, update_time) value (%d, "%s", "%s", "%s", %f, "%s", "%s", "%s")""" | |||
% (dup_module_id, dl[2], escape_string(content_y), escape_string(content_x), similarity, | |||
"功能模块描述", | |||
str(datetime.datetime.now())[:-7], str(datetime.datetime.now())[:-7])) | |||
mysql.sql_change_msg("""update idc_project_module_check set similarity=%f where dup_module_id=%d""" % ( | |||
total_similarity1 + total_similarity2, dup_module_id)) | |||
gnmk_gjc = {} | |||
for a in ["gnmc", "gnms"]: | |||
if i.get(a): | |||
content_x = i.get(a) | |||
content_y = i.get(a) | |||
if a == "gnmc": | |||
similarity = cosin_similarity.CosineSimilarity(content_x, content_y) | |||
# 相似度 关键词 | |||
similarity, keyword_x, keywords = similarity.main() | |||
gnmk_gjc[a] = keywords | |||
else: | |||
similarity = cosin_similarity.CosineSimilarity(content_x, content_y) | |||
# 相似度 关键词 | |||
similarity, keyword_x, keywords = similarity.main() | |||
gnmk_gjc[a] = keywords | |||
mysql.sql_change_msg("""insert into user_module_keywords (xmmc, gnmc, gnms) value("%s", "%s", "%s")""" % ( | |||
dl[2], str(gnmk_gjc.get("gnmc"))[1:-1] if gnmk_gjc.get("gnmc") else None, | |||
str(gnmk_gjc.get("gnms"))[1:-1] if gnmk_gjc.get("gnms") else None)) | |||
def project_check(data_list): | |||
mysql = mysql_pool.ConnMysql() | |||
# mysql.sql_select_many("""select * from mkgjc""") | |||
# 读取维度和权重 | |||
# xmnr_count = len(mysql.sql_select_many("""select * from xmnr_copy1""")) | |||
# gnmk_count = len(mysql.sql_select_many("""select * from gnmk_copy1""")) | |||
xmnr_count = len(mysql.sql_select_many("""select * from user_history_data""")) | |||
gnmk_count = len(mysql.sql_select_many("""select * from user_history_module_data""")) | |||
get_data_dict = getFlag() | |||
# 遍历excel存储路径 | |||
for dl in data_list: | |||
# path = "0825-丽水系统查重维度1.xlsx" | |||
# 读取路径下的excel | |||
print(dl,dl[1]) | |||
df = pd.read_excel(dl[1]) | |||
data = df.values | |||
# 将excel文件中的所有维度内容进行拼接 | |||
join_str = "" | |||
str_dict = {} | |||
title = "" | |||
er_title = set() | |||
for d in data: | |||
if pd.notnull(d[0]): | |||
title = d[0] | |||
if title == "功能模块": | |||
er_title.add(d[1]) | |||
join_str = "" | |||
for i in d[1:]: | |||
if pd.notnull(i): | |||
join_str += i | |||
str_dict[wdys1.get(title)] = join_str | |||
else: | |||
if title == "功能模块": | |||
er_title.add(d[1]) | |||
for i in d[1:]: | |||
if pd.notnull(i): | |||
join_str += i | |||
str_dict[wdys1.get(title)] = str_dict.get(wdys1.get(title)) + join_str | |||
print(str_dict) | |||
mysql.sql_change_msg( | |||
"""insert into user_data (xmmc, xzwt, xtjc, xmmb, yqjx, jsxq, sjxq, aqxq, ywly, hxyw, ywxq, ywxt, jscj, yhfw, mbqt, jsnr, gnmk, sjgx, znys) value ("%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s")""" | |||
% (dl[0], str_dict.get("xzwt") if str_dict.get("xzwt") else None, | |||
str_dict.get("xtjc") if str_dict.get("xtjc") else None, | |||
str_dict.get("xmmb") if str_dict.get("xmmb") else None, | |||
str_dict.get("yqjx") if str_dict.get("yqjx") else None, | |||
str_dict.get("jsxq") if str_dict.get("jsxq") else None, | |||
str_dict.get("sjxq") if str_dict.get("sjxq") else None, | |||
str_dict.get("aqxq") if str_dict.get("aqxq") else None, | |||
str_dict.get("ywly") if str_dict.get("ywly") else None, | |||
str_dict.get("hxyw") if str_dict.get("hxyw") else None, | |||
str_dict.get("ywxq") if str_dict.get("ywxq") else None, | |||
str_dict.get("ywxt") if str_dict.get("ywxt") else None, | |||
str_dict.get("jscj") if str_dict.get("jscj") else None, | |||
str_dict.get("yhfw") if str_dict.get("yhfw") else None, | |||
str_dict.get("mbqt") if str_dict.get("mbqt") else None, | |||
str_dict.get("jsnr") if str_dict.get("jsnr") else None, | |||
str_dict.get("gnmk") if str_dict.get("gnmk") else None, | |||
str_dict.get("sjgx") if str_dict.get("sjgx") else None, | |||
str_dict.get("znys") if str_dict.get("znys") else None)) | |||
# 或取所有的xmnr_copy1 | |||
xmnr_copy1 = mysql.sql_select_many("""select * from user_history_data""") | |||
# 对比xmnr_copy1和xmnr维度是否都有 | |||
if xmnr_copy1: | |||
for xc in xmnr_copy1: | |||
total_keywords = {} | |||
total_similarity = 0 | |||
dup_count = 0 | |||
# 保存相加后的相似度到idc_project_check | |||
mysql.sql_change_msg( | |||
"""insert into idc_project_check (project_id, dup_project_name, file_path, company_name, create_year, project_tag, project_range_tag, project_area, create_time, update_time) value ("%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s")""" | |||
% (dl[0], xc.get("xmmc"), dl[1], "", "", "需求相似、业务相似", "历史项目", "", | |||
str(datetime.datetime.now())[:-7], str(datetime.datetime.now())[:-7])) | |||
dup_id = mysql.cur.lastrowid | |||
for x in list(xc.keys())[1:]: | |||
content_x = xc.get(x) | |||
content_y = str_dict.get(x) | |||
if content_x and content_y: | |||
if x == 'gnmk': | |||
continue | |||
elif x == 'jsnr': | |||
continue | |||
else: | |||
dup_count += 1 | |||
if xc.get('gnmk')==' ' and str_dict.get('gnmk')==' ': | |||
for x in list(xc.keys())[1:]: | |||
content_x = xc.get(x) | |||
content_y = str_dict.get(x) | |||
if content_x and content_y: | |||
if x == 'gnmk': | |||
# 匹配到历史数据,次数加1 | |||
# dup_count += dup_file_test | |||
# 循环遍历每一个维度 | |||
similarity = cosin_similarity.CosineSimilarity(content_x, content_y) | |||
# 相似度 关键词 | |||
similarity, keywords_x, keywords_y = similarity.main() | |||
similarity = similarity * 0 | |||
#print("**************相似度: %.2f%%" % similarity, "关键词: %s" % keywords_y) | |||
# 相似度相加 | |||
total_similarity += similarity | |||
# 关键词收集 | |||
total_keywords[x] = keywords_y | |||
function_content = content_y | |||
dup_function_content = content_x | |||
for word_y in keywords_y: | |||
word_y = word_y.strip().strip("'").strip('"') | |||
function_content = str(function_content.replace("\"", "'")).replace(word_y, | |||
f'<span class="similarity">{word_y.strip()}</span>') | |||
for word_x in keywords_x: | |||
word_x = word_x.strip().strip("'").strip('"') | |||
dup_function_content = str(dup_function_content.replace("\"", "'")).replace(word_x, | |||
f'<span class="similarity">{word_x.strip()}</span>') | |||
# 保存每个维度对应的相似度到idc_project_check_detail | |||
mysql.sql_change_msg( | |||
"""insert into idc_project_check_detail (dup_id, dimension, similarity, function_content, dup_function_content, create_time, update_time) value (%d, "%s", %f, "%s", "%s", "%s", "%s")""" | |||
% (dup_id, wdys2.get(x), similarity, escape_string(function_content), | |||
escape_string(dup_function_content), str(datetime.datetime.now())[:-7], | |||
str(datetime.datetime.now())[:-7])) | |||
# content = content.replace(gjcs, f'<span class="similarity">{gjcs.strip()}</span>') | |||
elif x == 'jsnr': | |||
similarity = cosin_similarity.CosineSimilarity(content_x, content_y) | |||
# 相似度 关键词 | |||
similarity, keywords_x, keywords_y = similarity.main() | |||
similarity = similarity * 40 | |||
#print("**************相似度: %.2f%%" % similarity, "关键词: %s" % keywords_y) | |||
# 相似度相加 | |||
total_similarity += similarity | |||
# 关键词收集 | |||
total_keywords[x] = keywords_y | |||
function_content = content_y | |||
dup_function_content = content_x | |||
for word_y in keywords_y: | |||
word_y = word_y.strip().strip("'").strip('"') | |||
function_content = str(function_content.replace("\"", "'")).replace(word_y, | |||
f'<span class="similarity">{word_y.strip()}</span>') | |||
for word_x in keywords_x: | |||
word_x = word_x.strip().strip("'").strip('"') | |||
dup_function_content = str(dup_function_content.replace("\"", "'")).replace(word_x, | |||
f'<span class="similarity">{word_x.strip()}</span>') | |||
# 保存每个维度对应的相似度到idc_project_check_detail | |||
mysql.sql_change_msg( | |||
"""insert into idc_project_check_detail (dup_id, dimension, similarity, function_content, dup_function_content, create_time, update_time) value (%d, "%s", %f, "%s", "%s", "%s", "%s")""" | |||
% (dup_id, wdys2.get(x), similarity, escape_string(function_content), | |||
escape_string(dup_function_content), str(datetime.datetime.now())[:-7], | |||
str(datetime.datetime.now())[:-7])) | |||
else: | |||
similarity = cosin_similarity.CosineSimilarity(content_x, content_y) | |||
# 相似度 关键词 | |||
similarity, keywords_x, keywords_y = similarity.main() | |||
similarity = similarity * (60 / dup_count) | |||
#print("**************相似度: %.2f%%" % similarity, "关键词: %s" % keywords_y) | |||
# 相似度相加 | |||
total_similarity += similarity | |||
# 关键词收集 | |||
total_keywords[x] = keywords_y | |||
function_content = content_y | |||
dup_function_content = content_x | |||
for word_y in keywords_y: | |||
word_y = word_y.strip().strip("'").strip('"') | |||
function_content = str(function_content.replace("\"", "'")).replace(word_y, | |||
f'<span class="similarity">{word_y.strip()}</span>') | |||
for word_x in keywords_x: | |||
word_x = word_x.strip().strip("'").strip('"') | |||
dup_function_content = str(dup_function_content.replace("\"", "'")).replace(word_x, | |||
f'<span class="similarity">{word_x.strip()}</span>') | |||
# 保存每个维度对应的相似度到idc_project_check_detail | |||
mysql.sql_change_msg( | |||
"""insert into idc_project_check_detail (dup_id, dimension, similarity, function_content, dup_function_content, create_time, update_time) value (%d, "%s", %f, "%s", "%s", "%s", "%s")""" | |||
% (dup_id, wdys2.get(x), similarity, escape_string(function_content), | |||
escape_string(dup_function_content), str(datetime.datetime.now())[:-7], | |||
str(datetime.datetime.now())[:-7])) | |||
elif xc['jsnr'] == ' ' and str_dict['jsnr'] == ' ': | |||
for x in list(xc.keys())[1:]: | |||
content_x = xc.get(x) | |||
content_y = str_dict.get(x) | |||
if content_x and content_y: | |||
if x == 'gnmk': | |||
# 匹配到历史数据,次数加1 | |||
# dup_count += dup_file_test | |||
# 循环遍历每一个维度 | |||
similarity = cosin_similarity.CosineSimilarity(content_x, content_y) | |||
# 相似度 关键词 | |||
similarity, keywords_x, keywords_y = similarity.main() | |||
similarity = similarity * 50 | |||
#print("**************相似度: %.2f%%" % similarity, "关键词: %s" % keywords_y) | |||
# 相似度相加 | |||
total_similarity += similarity | |||
# 关键词收集 | |||
total_keywords[x] = keywords_y | |||
function_content = content_y | |||
dup_function_content = content_x | |||
for word_y in keywords_y: | |||
word_y = word_y.strip().strip("'").strip('"') | |||
function_content = str(function_content.replace("\"", "'")).replace(word_y, | |||
f'<span class="similarity">{word_y.strip()}</span>') | |||
for word_x in keywords_x: | |||
word_x = word_x.strip().strip("'").strip('"') | |||
dup_function_content = str(dup_function_content.replace("\"", "'")).replace(word_x, | |||
f'<span class="similarity">{word_x.strip()}</span>') | |||
# 保存每个维度对应的相似度到idc_project_check_detail | |||
mysql.sql_change_msg( | |||
"""insert into idc_project_check_detail (dup_id, dimension, similarity, function_content, dup_function_content, create_time, update_time) value (%d, "%s", %f, "%s", "%s", "%s", "%s")""" | |||
% (dup_id, wdys2.get(x), similarity, escape_string(function_content), | |||
escape_string(dup_function_content), str(datetime.datetime.now())[:-7], | |||
str(datetime.datetime.now())[:-7])) | |||
# content = content.replace(gjcs, f'<span class="similarity">{gjcs.strip()}</span>') | |||
elif x == 'jsnr': | |||
similarity = cosin_similarity.CosineSimilarity(content_x, content_y) | |||
# 相似度 关键词 | |||
similarity, keywords_x, keywords_y = similarity.main() | |||
similarity = similarity * 0 | |||
#print("**************相似度: %.2f%%" % similarity, "关键词: %s" % keywords_y) | |||
# 相似度相加 | |||
total_similarity += similarity | |||
# 关键词收集 | |||
total_keywords[x] = keywords_y | |||
function_content = content_y | |||
dup_function_content = content_x | |||
for word_y in keywords_y: | |||
word_y = word_y.strip().strip("'").strip('"') | |||
function_content = str(function_content.replace("\"", "'")).replace(word_y, | |||
f'<span class="similarity">{word_y.strip()}</span>') | |||
for word_x in keywords_x: | |||
word_x = word_x.strip().strip("'").strip('"') | |||
dup_function_content = str(dup_function_content.replace("\"", "'")).replace(word_x, | |||
f'<span class="similarity">{word_x.strip()}</span>') | |||
# 保存每个维度对应的相似度到idc_project_check_detail | |||
mysql.sql_change_msg( | |||
"""insert into idc_project_check_detail (dup_id, dimension, similarity, function_content, dup_function_content, create_time, update_time) value (%d, "%s", %f, "%s", "%s", "%s", "%s")""" | |||
% (dup_id, wdys2.get(x), similarity, escape_string(function_content), | |||
escape_string(dup_function_content), str(datetime.datetime.now())[:-7], | |||
str(datetime.datetime.now())[:-7])) | |||
else: | |||
similarity = cosin_similarity.CosineSimilarity(content_x, content_y) | |||
# 相似度 关键词 | |||
similarity, keywords_x, keywords_y = similarity.main() | |||
similarity = similarity * (50 / dup_count) | |||
#print("**************相似度: %.2f%%" % similarity, "关键词: %s" % keywords_y) | |||
# 相似度相加 | |||
total_similarity += similarity | |||
# 关键词收集 | |||
total_keywords[x] = keywords_y | |||
function_content = content_y | |||
dup_function_content = content_x | |||
for word_y in keywords_y: | |||
word_y = word_y.strip().strip("'").strip('"') | |||
function_content = str(function_content.replace("\"", "'")).replace(word_y, | |||
f'<span class="similarity">{word_y.strip()}</span>') | |||
for word_x in keywords_x: | |||
word_x = word_x.strip().strip("'").strip('"') | |||
dup_function_content = str(dup_function_content.replace("\"", "'")).replace(word_x, | |||
f'<span class="similarity">{word_x.strip()}</span>') | |||
# 保存每个维度对应的相似度到idc_project_check_detail | |||
mysql.sql_change_msg( | |||
"""insert into idc_project_check_detail (dup_id, dimension, similarity, function_content, dup_function_content, create_time, update_time) value (%d, "%s", %f, "%s", "%s", "%s", "%s")""" | |||
% (dup_id, wdys2.get(x), similarity, escape_string(function_content), | |||
escape_string(dup_function_content), str(datetime.datetime.now())[:-7], | |||
str(datetime.datetime.now())[:-7])) | |||
else: | |||
for x in list(xc.keys())[1:]: | |||
content_x = xc.get(x) | |||
content_y = str_dict.get(x) | |||
if content_x and content_y: | |||
if x == 'gnmk': | |||
# 匹配到历史数据,次数加1 | |||
# dup_count += dup_file_test | |||
# 循环遍历每一个维度 | |||
similarity = cosin_similarity.CosineSimilarity(content_x, content_y) | |||
# 相似度 关键词 | |||
similarity, keywords_x, keywords_y = similarity.main() | |||
similarity = similarity * 50 | |||
#print("**************相似度: %.2f%%" % similarity, "关键词: %s" % keywords_y) | |||
# 相似度相加 | |||
total_similarity += similarity | |||
# 关键词收集 | |||
total_keywords[x] = keywords_y | |||
function_content = content_y | |||
dup_function_content = content_x | |||
for word_y in keywords_y: | |||
word_y = word_y.strip().strip("'").strip('"') | |||
function_content = str(function_content.replace("\"", "'")).replace(word_y, | |||
f'<span class="similarity">{word_y.strip()}</span>') | |||
for word_x in keywords_x: | |||
word_x = word_x.strip().strip("'").strip('"') | |||
dup_function_content = str(dup_function_content.replace("\"", "'")).replace(word_x, | |||
f'<span class="similarity">{word_x.strip()}</span>') | |||
# 保存每个维度对应的相似度到idc_project_check_detail | |||
mysql.sql_change_msg( | |||
"""insert into idc_project_check_detail (dup_id, dimension, similarity, function_content, dup_function_content, create_time, update_time) value (%d, "%s", %f, "%s", "%s", "%s", "%s")""" | |||
% (dup_id, wdys2.get(x), similarity, escape_string(function_content), | |||
escape_string(dup_function_content), str(datetime.datetime.now())[:-7], | |||
str(datetime.datetime.now())[:-7])) | |||
# content = content.replace(gjcs, f'<span class="similarity">{gjcs.strip()}</span>') | |||
elif x == 'jsnr': | |||
similarity = cosin_similarity.CosineSimilarity(content_x, content_y) | |||
# 相似度 关键词 | |||
similarity, keywords_x, keywords_y = similarity.main() | |||
similarity = similarity * 40 | |||
#print("**************相似度: %.2f%%" % similarity, "关键词: %s" % keywords_y) | |||
# 相似度相加 | |||
total_similarity += similarity | |||
# 关键词收集 | |||
total_keywords[x] = keywords_y | |||
function_content = content_y | |||
dup_function_content = content_x | |||
for word_y in keywords_y: | |||
word_y = word_y.strip().strip("'").strip('"') | |||
function_content = str(function_content.replace("\"", "'")).replace(word_y, | |||
f'<span class="similarity">{word_y.strip()}</span>') | |||
for word_x in keywords_x: | |||
word_x = word_x.strip().strip("'").strip('"') | |||
dup_function_content = str(dup_function_content.replace("\"", "'")).replace(word_x, | |||
f'<span class="similarity">{word_x.strip()}</span>') | |||
# 保存每个维度对应的相似度到idc_project_check_detail | |||
mysql.sql_change_msg( | |||
"""insert into idc_project_check_detail (dup_id, dimension, similarity, function_content, dup_function_content, create_time, update_time) value (%d, "%s", %f, "%s", "%s", "%s", "%s")""" | |||
% (dup_id, wdys2.get(x), similarity, escape_string(function_content), | |||
escape_string(dup_function_content), str(datetime.datetime.now())[:-7], | |||
str(datetime.datetime.now())[:-7])) | |||
else: | |||
similarity = cosin_similarity.CosineSimilarity(content_x, content_y) | |||
# 相似度 关键词 | |||
similarity, keywords_x, keywords_y = similarity.main() | |||
similarity = similarity * (10 / dup_count) | |||
#print("**************相似度: %.2f%%" % similarity, "关键词: %s" % keywords_y) | |||
# 相似度相加 | |||
total_similarity += similarity | |||
# 关键词收集 | |||
total_keywords[x] = keywords_y | |||
function_content = content_y | |||
dup_function_content = content_x | |||
for word_y in keywords_y: | |||
word_y = word_y.strip().strip("'").strip('"') | |||
function_content = str(function_content.replace("\"", "'")).replace(word_y, | |||
f'<span class="similarity">{word_y.strip()}</span>') | |||
for word_x in keywords_x: | |||
word_x = word_x.strip().strip("'").strip('"') | |||
dup_function_content = str(dup_function_content.replace("\"", "'")).replace(word_x, | |||
f'<span class="similarity">{word_x.strip()}</span>') | |||
# 保存每个维度对应的相似度到idc_project_check_detail | |||
mysql.sql_change_msg( | |||
"""insert into idc_project_check_detail (dup_id, dimension, similarity, function_content, dup_function_content, create_time, update_time) value (%d, "%s", %f, "%s", "%s", "%s", "%s")""" | |||
% (dup_id, wdys2.get(x), similarity, escape_string(function_content), | |||
escape_string(dup_function_content), str(datetime.datetime.now())[:-7], | |||
str(datetime.datetime.now())[:-7])) | |||
mysql.sql_change_msg( | |||
"""update idc_project_check set similarity=%f where dup_id=%d""" % (total_similarity, dup_id)) | |||
project_gjc = {} | |||
for w in wdys2.keys(): | |||
content_x = str_dict.get(w) | |||
content_y = str_dict.get(w) | |||
if content_x and content_y: | |||
# 循环遍历每一个维度 | |||
similarity = cosin_similarity.CosineSimilarity(content_x, content_y) | |||
# 相似度 关键词 | |||
similarity, keywords_x, keywords = similarity.main() | |||
project_gjc[w] = keywords | |||
mysql.sql_change_msg( | |||
"""insert into user_keyword (xmmc, xzwt, xtjc, xmmb, yqjx, jsxq, sjxq, aqxq, ywly, hxyw, ywxq, ywxt, jscj, yhfw, mbqt, jsnr, gnmk, sjgx, znys) value ("%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s")""" | |||
% (dl[0], str(project_gjc.get("xzwt"))[1:-1] if project_gjc.get("xzwt") else None, | |||
str(project_gjc.get("xtjc"))[1:-1] if project_gjc.get("xtjc") else None, | |||
str(project_gjc.get("xmmb"))[1:-1] if project_gjc.get("xmmb") else None, | |||
str(project_gjc.get("yqjx"))[1:-1] if project_gjc.get("yqjx") else None, | |||
str(project_gjc.get("jsxq"))[1:-1] if project_gjc.get("jsxq") else None, | |||
str(project_gjc.get("sjxq"))[1:-1] if project_gjc.get("sjxq") else None, | |||
str(project_gjc.get("aqxq"))[1:-1] if project_gjc.get("aqxq") else None, | |||
str(project_gjc.get("ywly"))[1:-1] if project_gjc.get("ywly") else None, | |||
str(project_gjc.get("hxyw"))[1:-1] if project_gjc.get("hxyw") else None, | |||
str(project_gjc.get("ywxq"))[1:-1] if project_gjc.get("ywxq") else None, | |||
str(project_gjc.get("ywxt"))[1:-1] if project_gjc.get("ywxt") else None, | |||
str(project_gjc.get("jscj"))[1:-1] if project_gjc.get("jscj") else None, | |||
str(project_gjc.get("yhfw"))[1:-1] if project_gjc.get("yhfw") else None, | |||
str(project_gjc.get("mbqt"))[1:-1] if project_gjc.get("mbqt") else None, | |||
str(project_gjc.get("jsnr"))[1:-1] if project_gjc.get("jsnr") else None, | |||
str(project_gjc.get("gnmk"))[1:-1] if project_gjc.get("gnmk") else None, | |||
str(project_gjc.get("sjgx"))[1:-1] if project_gjc.get("sjgx") else None, | |||
str(project_gjc.get("znys"))[1:-1] if project_gjc.get("znys") else None)) | |||
mysql.sql_change_msg( | |||
"""update idc_project set dup_status=3, one_vote_veto_status=dup_file_test, self_check_status=dup_file_test, history_project_count=%d ,module_count=%d where project_id=%d""" % ( | |||
xmnr_count, gnmk_count, dl[0])) | |||
gong_neng_mo_kuai(mysql, dl, data, er_title) | |||
if __name__ == "__main__": | |||
# all_path = requests.get("http://127.0.0.1:19099/check/duplicates/%s" % 15).json() | |||
# print(all_path) | |||
# data_list = [] | |||
# for ap in all_path.get("data"): | |||
# # if os.path.exists(ap.get("file_path")): | |||
# data_list.append((ap.get("project_id"), ap.get("file_path"), ap.get("project_name"))) | |||
# print(data_list) | |||
data_list = [(11, r"C:\Users\HUAWEI\PycharmProjects\nlp\dup_check\0825-丽水系统查重维度1.xlsx", "水路运输综合监管系统建设项目.xls")] | |||
project_check(data_list) | |||
""" | |||
""" |
@@ -0,0 +1,391 @@ | |||
# coding=utf-8 | |||
import sys | |||
import re | |||
import mysql_pool | |||
from pymysql.converters import escape_string | |||
import cosin_similarity | |||
import pandas as pd | |||
import datetime | |||
import requests | |||
import os | |||
wdys1 = { | |||
"项目名称": "xmmc", | |||
"现状问题": "xzwt", | |||
"系统基础": "xtjc", | |||
"项目目标": "xmmb", | |||
"预期绩效": "yqjx", | |||
"建设需求": "jsxq", | |||
"数据需求": "sjxq", | |||
"安全需求": "aqxq", | |||
"业务领域": "ywly", | |||
"核心业务": "hxyw", | |||
"业务需求": "ywxq", | |||
"业务协同": "ywxt", | |||
"建设层级": "jscj", | |||
"用户范围": "yhfw", | |||
"目标群体": "mbqt", | |||
"建设内容": "jsnr", | |||
"功能模块": "gnmk", | |||
"数据共享": "sjgx", | |||
"智能要素": "znys" | |||
} | |||
wdys2 = { | |||
"xmmc": "项目名称", | |||
"xzwt": "现状问题", | |||
"xtjc": "系统基础", | |||
"xmmb": "项目目标", | |||
"yqjx": "预期绩效", | |||
"jsxq": "建设需求", | |||
"sjxq": "数据需求", | |||
"aqxq": "安全需求", | |||
"ywly": "业务领域", | |||
"hxyw": "核心业务", | |||
"ywxq": "业务需求", | |||
"ywxt": "业务协同", | |||
"jscj": "建设层级", | |||
"yhfw": "用户范围", | |||
"mbqt": "目标群体", | |||
"jsnr": "建设内容", | |||
"gnmk": "功能模块", | |||
"sjgx": "数据共享", | |||
"znys": "智能要素" | |||
} | |||
gnmkys = { | |||
"gnmc": "功能名称", | |||
"gnms": "功能描述" | |||
} | |||
def getFlag(): | |||
data_dict = {} | |||
df = pd.read_excel("0825-丽水系统查重维度.xlsx") | |||
data = df.values | |||
data = list(pd.Series(data[:, 1]).dropna()) | |||
for d in data: | |||
try: | |||
wd = re.search("(.*?)(.*?%)", d).group(1).strip() | |||
wdc = wdys1.get(wd) | |||
if wdc: | |||
qz = re.search(".*?((.*?%))", d).group(1) | |||
data_dict[wdc] = qz | |||
except: | |||
pass | |||
return data_dict | |||
def gong_neng_mo_kuai(mysql, dl, data, er_title): | |||
# 将excel文件中的所有第三维度内容进行拼接 | |||
str_dict = {} | |||
for et in er_title: | |||
for d in data: | |||
if d[1] == et: | |||
if str_dict.get(et): | |||
str_dict[et] = str_dict.get(et) + d[3] | |||
else: | |||
str_dict[et] = d[3] | |||
# print(str_dict) | |||
for k, v in str_dict.items(): | |||
mysql.sql_change_msg( | |||
"""insert into idc_project_module (project_id, check_duplicate_count, module_name, module_content, create_time, update_time, tag) value(%d, 1, "%s", "%s", "%s", "%s", "模块")""" % ( | |||
int(dl[0]), k, v, str(datetime.datetime.now())[:-7], str(datetime.datetime.now())[:-7])) | |||
module_id_list = mysql.sql_select_many( | |||
"""select project_module_id, module_name, module_content from idc_project_module where project_id=%d""" % dl[ | |||
0]) | |||
data_list = [] | |||
for mil in module_id_list: | |||
data_dict = {} | |||
data_dict["project_module_id"] = mil.get("project_module_id") | |||
data_dict["gnmc"] = mil.get("module_name") | |||
data_dict["gnms"] = mil.get("module_content") | |||
data_list.append(data_dict) | |||
# print(data_list) | |||
for i in data_list: | |||
gnmk_copy1 = mysql.sql_select_many("""select * from user_history_module_data""") | |||
if gnmk_copy1: | |||
for gc in gnmk_copy1: | |||
total_similarity1 = 0 | |||
total_keywords1 = [] | |||
total_similarity2 = 0 | |||
total_keywords2 = [] | |||
mysql.sql_change_msg( | |||
"""insert into idc_project_module_check (project_module_id, module_name, project_name, company_name, create_time, update_time) value(%d, "%s", "%s", "%s", "%s", "%s")""" | |||
% ( | |||
i.get("project_module_id"), gc.get("gnmc"), gc.get("xmmc"), "", | |||
str(datetime.datetime.now())[:-7], | |||
str(datetime.datetime.now())[:-7])) | |||
dup_module_id = mysql.cur.lastrowid | |||
for j in ["gnmc", "gnms"]: | |||
# 循环遍历每一个模块名称 | |||
content_x = gc.get(j) | |||
content_y = i.get(j) | |||
if content_x and content_y: | |||
if j == "gnmc": | |||
similarity = cosin_similarity.CosineSimilarity(content_x, content_y) | |||
# 相似度 关键词 | |||
similarity, keyword_x, keywords = similarity.main() | |||
similarity = similarity * 1 | |||
total_keywords1 += keywords | |||
print("######################相似度: %.2f%%" % similarity, "关键词: %s" % keywords) | |||
# 相似度相加 | |||
total_similarity1 += similarity | |||
mysql.sql_change_msg( | |||
"""insert into idc_project_module_check_detail (dup_module_id, project_name, module_content, dup_module_content, similarity, dimension, create_time, update_time) value (%d, "%s", "%s", "%s", %f, "%s", "%s", "%s")""" | |||
% (dup_module_id, dl[2], escape_string(content_y), escape_string(content_x), similarity, | |||
"功能名称", | |||
str(datetime.datetime.now())[:-7], str(datetime.datetime.now())[:-7])) | |||
else: | |||
similarity = cosin_similarity.CosineSimilarity(content_x, content_y) | |||
# 相似度 关键词 | |||
similarity, keyword_x, keywords = similarity.main() | |||
similarity = similarity * 99 | |||
total_keywords2 += keywords | |||
print("######################相似度: %.2f%%" % similarity, "关键词: %s" % keywords) | |||
# 相似度相加 | |||
total_similarity2 += similarity | |||
mysql.sql_change_msg( | |||
"""insert into idc_project_module_check_detail (dup_module_id, project_name, module_content, dup_module_content, similarity, dimension, create_time, update_time) value (%d, "%s", "%s", "%s", %f, "%s", "%s", "%s")""" | |||
% (dup_module_id, dl[2], escape_string(content_y), escape_string(content_x), similarity, | |||
"功能模块描述", | |||
str(datetime.datetime.now())[:-7], str(datetime.datetime.now())[:-7])) | |||
mysql.sql_change_msg("""update idc_project_module_check set similarity=%f where dup_module_id=%d""" % ( | |||
total_similarity1 + total_similarity2, dup_module_id)) | |||
gnmk_gjc = {} | |||
for a in ["gnmc", "gnms"]: | |||
if i.get(a): | |||
content_x = i.get(a) | |||
content_y = i.get(a) | |||
if a == "gnmc": | |||
similarity = cosin_similarity.CosineSimilarity(content_x, content_y) | |||
# 相似度 关键词 | |||
similarity, keyword_x, keywords = similarity.main() | |||
gnmk_gjc[a] = keywords | |||
else: | |||
similarity = cosin_similarity.CosineSimilarity(content_x, content_y) | |||
# 相似度 关键词 | |||
similarity, keyword_x, keywords = similarity.main() | |||
gnmk_gjc[a] = keywords | |||
mysql.sql_change_msg("""insert into user_module_keywords (xmmc, gnmc, gnms) value("%s", "%s", "%s")""" % ( | |||
dl[2], str(gnmk_gjc.get("gnmc"))[1:-1] if gnmk_gjc.get("gnmc") else None, | |||
str(gnmk_gjc.get("gnms"))[1:-1] if gnmk_gjc.get("gnms") else None)) | |||
def project_check(data_list): | |||
mysql = mysql_pool.ConnMysql() | |||
# mysql.sql_select_many("""select * from mkgjc""") | |||
# 读取维度和权重 | |||
# xmnr_count = len(mysql.sql_select_many("""select * from xmnr_copy1""")) | |||
# gnmk_count = len(mysql.sql_select_many("""select * from gnmk_copy1""")) | |||
xmnr_count = len(mysql.sql_select_many("""select * from user_history_data""")) | |||
gnmk_count = len(mysql.sql_select_many("""select * from user_history_module_data""")) | |||
get_data_dict = getFlag() | |||
# 遍历excel存储路径 | |||
for dl in data_list: | |||
# path = "0825-丽水系统查重维度1.xlsx" | |||
# 读取路径下的excel | |||
df = pd.read_excel(dl[1]) | |||
data = df.values | |||
# 将excel文件中的所有维度内容进行拼接 | |||
join_str = "" | |||
str_dict = {} | |||
title = "" | |||
er_title = set() | |||
for d in data: | |||
if pd.notnull(d[0]): | |||
title = d[0] | |||
if title == "功能模块": | |||
er_title.add(d[1]) | |||
join_str = "" | |||
for i in d[1:]: | |||
if pd.notnull(i): | |||
join_str +=i | |||
str_dict[wdys1.get(title)] = join_str | |||
else: | |||
if title == "功能模块": | |||
er_title.add(d[1]) | |||
for i in d[1:]: | |||
if pd.notnull(i): | |||
join_str +=i | |||
str_dict[wdys1.get(title)] = str_dict.get(wdys1.get(title)) + join_str | |||
# print(str_dict) | |||
mysql.sql_change_msg( | |||
"""insert into user_data (xmmc, xzwt, xtjc, xmmb, yqjx, jsxq, sjxq, aqxq, ywly, hxyw, ywxq, ywxt, jscj, yhfw, mbqt, jsnr, gnmk, sjgx, znys) value ("%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s")""" | |||
% (dl[0], str_dict.get("xzwt") if str_dict.get("xzwt") else None, | |||
str_dict.get("xtjc") if str_dict.get("xtjc") else None, | |||
str_dict.get("xmmb") if str_dict.get("xmmb") else None, | |||
str_dict.get("yqjx") if str_dict.get("yqjx") else None, | |||
str_dict.get("jsxq") if str_dict.get("jsxq") else None, | |||
str_dict.get("sjxq") if str_dict.get("sjxq") else None, | |||
str_dict.get("aqxq") if str_dict.get("aqxq") else None, | |||
str_dict.get("ywly") if str_dict.get("ywly") else None, | |||
str_dict.get("hxyw") if str_dict.get("hxyw") else None, | |||
str_dict.get("ywxq") if str_dict.get("ywxq") else None, | |||
str_dict.get("ywxt") if str_dict.get("ywxt") else None, | |||
str_dict.get("jscj") if str_dict.get("jscj") else None, | |||
str_dict.get("yhfw") if str_dict.get("yhfw") else None, | |||
str_dict.get("mbqt") if str_dict.get("mbqt") else None, | |||
str_dict.get("jsnr") if str_dict.get("jsnr") else None, | |||
str_dict.get("gnmk") if str_dict.get("gnmk") else None, | |||
str_dict.get("sjgx") if str_dict.get("sjgx") else None, | |||
str_dict.get("znys") if str_dict.get("znys") else None)) | |||
# 或取所有的xmnr_copy1 | |||
xmnr_copy1 = mysql.sql_select_many("""select * from user_history_data""") | |||
# 对比xmnr_copy1和xmnr维度是否都有 | |||
if xmnr_copy1: | |||
for xc in xmnr_copy1: | |||
total_keywords = {} | |||
total_similarity = 0 | |||
dup_count = 0 | |||
# 保存相加后的相似度到idc_project_check | |||
mysql.sql_change_msg( | |||
"""insert into idc_project_check (project_id, dup_project_name, file_path, company_name, create_year, project_tag, project_range_tag, project_area, create_time, update_time) value ("%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s")""" | |||
% (dl[0], xc.get("xmmc"), dl[1], "", "", "需求相似、业务相似", "历史项目", "", | |||
str(datetime.datetime.now())[:-7], str(datetime.datetime.now())[:-7])) | |||
dup_id = mysql.cur.lastrowid | |||
for x in list(xc.keys())[1:]: | |||
content_x = xc.get(x) | |||
content_y = str_dict.get(x) | |||
if content_x and content_y: | |||
if x == 'gnmk': | |||
continue | |||
elif x == 'jsnr': | |||
continue | |||
else: | |||
dup_count += 1 | |||
for x in list(xc.keys())[1:]: | |||
content_x = xc.get(x) | |||
content_y = str_dict.get(x) | |||
if content_x and content_y: | |||
if x == 'gnmk': | |||
# 匹配到历史数据,次数加1 | |||
# dup_count += 1 | |||
# 循环遍历每一个维度 | |||
similarity = cosin_similarity.CosineSimilarity(content_x, content_y) | |||
# 相似度 关键词 | |||
similarity, keywords_x, keywords_y = similarity.main() | |||
similarity = similarity * 50 | |||
print("**************相似度: %.2f%%" % similarity, "关键词: %s" % keywords_y) | |||
# 相似度相加 | |||
total_similarity += similarity | |||
# 关键词收集 | |||
total_keywords[x] = keywords_y | |||
function_content = content_y | |||
dup_function_content = content_x | |||
for word_y in keywords_y: | |||
word_y = word_y.strip().strip("'").strip('"') | |||
function_content = str(function_content.replace("\"", "'")).replace(word_y, | |||
f'<span class="similarity">{word_y.strip()}</span>') | |||
for word_x in keywords_x: | |||
word_x = word_x.strip().strip("'").strip('"') | |||
dup_function_content = str(dup_function_content.replace("\"", "'")).replace(word_x, | |||
f'<span class="similarity">{word_x.strip()}</span>') | |||
# 保存每个维度对应的相似度到idc_project_check_detail | |||
mysql.sql_change_msg( | |||
"""insert into idc_project_check_detail (dup_id, dimension, similarity, function_content, dup_function_content, create_time, update_time) value (%d, "%s", %f, "%s", "%s", "%s", "%s")""" | |||
% (dup_id, wdys2.get(x), similarity, escape_string(function_content), | |||
escape_string(dup_function_content), str(datetime.datetime.now())[:-7], | |||
str(datetime.datetime.now())[:-7])) | |||
# content = content.replace(gjcs, f'<span class="similarity">{gjcs.strip()}</span>') | |||
elif x == 'jsnr': | |||
similarity = cosin_similarity.CosineSimilarity(content_x, content_y) | |||
# 相似度 关键词 | |||
similarity, keywords_x, keywords_y = similarity.main() | |||
similarity = similarity * 40 | |||
print("**************相似度: %.2f%%" % similarity, "关键词: %s" % keywords_y) | |||
# 相似度相加 | |||
total_similarity += similarity | |||
# 关键词收集 | |||
total_keywords[x] = keywords_y | |||
function_content = content_y | |||
dup_function_content = content_x | |||
for word_y in keywords_y: | |||
word_y = word_y.strip().strip("'").strip('"') | |||
function_content = str(function_content.replace("\"", "'")).replace(word_y, | |||
f'<span class="similarity">{word_y.strip()}</span>') | |||
for word_x in keywords_x: | |||
word_x = word_x.strip().strip("'").strip('"') | |||
dup_function_content = str(dup_function_content.replace("\"", "'")).replace(word_x, | |||
f'<span class="similarity">{word_x.strip()}</span>') | |||
# 保存每个维度对应的相似度到idc_project_check_detail | |||
mysql.sql_change_msg( | |||
"""insert into idc_project_check_detail (dup_id, dimension, similarity, function_content, dup_function_content, create_time, update_time) value (%d, "%s", %f, "%s", "%s", "%s", "%s")""" | |||
% (dup_id, wdys2.get(x), similarity, escape_string(function_content), | |||
escape_string(dup_function_content), str(datetime.datetime.now())[:-7], | |||
str(datetime.datetime.now())[:-7])) | |||
else: | |||
similarity = cosin_similarity.CosineSimilarity(content_x, content_y) | |||
# 相似度 关键词 | |||
similarity, keywords_x, keywords_y = similarity.main() | |||
similarity = similarity * (10 / dup_count) | |||
print("**************相似度: %.2f%%" % similarity, "关键词: %s" % keywords_y) | |||
# 相似度相加 | |||
total_similarity += similarity | |||
# 关键词收集 | |||
total_keywords[x] = keywords_y | |||
function_content = content_y | |||
dup_function_content = content_x | |||
for word_y in keywords_y: | |||
word_y = word_y.strip().strip("'").strip('"') | |||
function_content = str(function_content.replace("\"", "'")).replace(word_y, | |||
f'<span class="similarity">{word_y.strip()}</span>') | |||
for word_x in keywords_x: | |||
word_x = word_x.strip().strip("'").strip('"') | |||
dup_function_content = str(dup_function_content.replace("\"", "'")).replace(word_x, | |||
f'<span class="similarity">{word_x.strip()}</span>') | |||
# 保存每个维度对应的相似度到idc_project_check_detail | |||
mysql.sql_change_msg( | |||
"""insert into idc_project_check_detail (dup_id, dimension, similarity, function_content, dup_function_content, create_time, update_time) value (%d, "%s", %f, "%s", "%s", "%s", "%s")""" | |||
% (dup_id, wdys2.get(x), similarity, escape_string(function_content), | |||
escape_string(dup_function_content), str(datetime.datetime.now())[:-7], | |||
str(datetime.datetime.now())[:-7])) | |||
mysql.sql_change_msg( | |||
"""update idc_project_check set similarity=%f where dup_id=%d""" % (total_similarity, dup_id)) | |||
project_gjc = {} | |||
for w in wdys2.keys(): | |||
content_x = str_dict.get(w) | |||
content_y = str_dict.get(w) | |||
if content_x and content_y: | |||
# 循环遍历每一个维度 | |||
similarity = cosin_similarity.CosineSimilarity(content_x, content_y) | |||
# 相似度 关键词 | |||
similarity, keywords_x, keywords = similarity.main() | |||
project_gjc[w] = keywords | |||
mysql.sql_change_msg( | |||
"""insert into user_keyword (xmmc, xzwt, xtjc, xmmb, yqjx, jsxq, sjxq, aqxq, ywly, hxyw, ywxq, ywxt, jscj, yhfw, mbqt, jsnr, gnmk, sjgx, znys) value ("%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s")""" | |||
% (dl[0], str(project_gjc.get("xzwt"))[1:-1] if project_gjc.get("xzwt") else None, | |||
str(project_gjc.get("xtjc"))[1:-1] if project_gjc.get("xtjc") else None, | |||
str(project_gjc.get("xmmb"))[1:-1] if project_gjc.get("xmmb") else None, | |||
str(project_gjc.get("yqjx"))[1:-1] if project_gjc.get("yqjx") else None, | |||
str(project_gjc.get("jsxq"))[1:-1] if project_gjc.get("jsxq") else None, | |||
str(project_gjc.get("sjxq"))[1:-1] if project_gjc.get("sjxq") else None, | |||
str(project_gjc.get("aqxq"))[1:-1] if project_gjc.get("aqxq") else None, | |||
str(project_gjc.get("ywly"))[1:-1] if project_gjc.get("ywly") else None, | |||
str(project_gjc.get("hxyw"))[1:-1] if project_gjc.get("hxyw") else None, | |||
str(project_gjc.get("ywxq"))[1:-1] if project_gjc.get("ywxq") else None, | |||
str(project_gjc.get("ywxt"))[1:-1] if project_gjc.get("ywxt") else None, | |||
str(project_gjc.get("jscj"))[1:-1] if project_gjc.get("jscj") else None, | |||
str(project_gjc.get("yhfw"))[1:-1] if project_gjc.get("yhfw") else None, | |||
str(project_gjc.get("mbqt"))[1:-1] if project_gjc.get("mbqt") else None, | |||
str(project_gjc.get("jsnr"))[1:-1] if project_gjc.get("jsnr") else None, | |||
str(project_gjc.get("gnmk"))[1:-1] if project_gjc.get("gnmk") else None, | |||
str(project_gjc.get("sjgx"))[1:-1] if project_gjc.get("sjgx") else None, | |||
str(project_gjc.get("znys"))[1:-1] if project_gjc.get("znys") else None)) | |||
mysql.sql_change_msg( | |||
"""update idc_project set dup_status=3, one_vote_veto_status=1, self_check_status=1, history_project_count=%d ,module_count=%d where project_id=%d""" % ( | |||
xmnr_count, gnmk_count, dl[0])) | |||
gong_neng_mo_kuai(mysql, dl, data, er_title) | |||
if __name__ == "__main__": | |||
all_path = requests.get("http://127.0.0.1:19099/check/duplicates/%s" % 15).json() | |||
# print(all_path) | |||
# | |||
# data_list = [] | |||
# for ap in all_path.get("data"): | |||
# # if os.path.exists(ap.get("file_path")): | |||
# data_list.append((ap.get("project_id"), ap.get("file_path"), ap.get("project_name"))) | |||
# print(data_list) | |||
# data_list = [(11, r"D:\pythonDM\Ndkj\duplicate_check\0825-丽水系统查重维度1.xlsx", "数字百山祖(一期)—“云值守”建设方案")] | |||
# project_check(data_list) |
@@ -0,0 +1,113 @@ | |||
# coding=utf-8 | |||
import pymysql | |||
from dbutils.pooled_db import PooledDB | |||
# from dbutils.persistent_db import PersistentDB | |||
mysqlInfo = { | |||
"host": '47.98.125.47', | |||
"user": 'root', | |||
"passwd": 'NingdaKeji123!', | |||
"db": 'idc', | |||
"port": 3306, | |||
"charset": "utf8" | |||
} | |||
class ConnMysql(object): | |||
__pool = None | |||
def __init__(self): | |||
# 构造函数,创建数据库连接、游标 | |||
self.coon = ConnMysql._get_mysql_conn() | |||
self.cur = self.coon.cursor(cursor=pymysql.cursors.DictCursor) | |||
# 数据库连接池连接 | |||
@staticmethod | |||
def _get_mysql_conn(): | |||
global __pool | |||
if ConnMysql.__pool is None: | |||
__pool = PooledDB( | |||
creator=pymysql, | |||
mincached=1, | |||
maxcached=5, | |||
maxconnections=6, | |||
maxshared=3, | |||
blocking=True, | |||
maxusage=None, | |||
setsession=[], | |||
ping=2, | |||
host=mysqlInfo['host'], | |||
user=mysqlInfo['user'], | |||
passwd=mysqlInfo['passwd'], | |||
db=mysqlInfo['db'], | |||
port=mysqlInfo['port'], | |||
charset=mysqlInfo['charset']) | |||
return __pool.connection() | |||
# 插入、修改、删除一条 | |||
def sql_change_msg(self, sql): | |||
change_sql = self.cur.execute(sql) | |||
self.coon.commit() | |||
return change_sql | |||
# 查询一条 | |||
def sql_select_one(self, sql): | |||
self.cur.execute(sql) | |||
select_res = self.cur.fetchone() | |||
return select_res | |||
# 查询多条 | |||
def sql_select_many(self, sql, count=None): | |||
self.cur.execute(sql) | |||
if count is None: | |||
select_res = self.cur.fetchall() | |||
else: | |||
select_res = self.cur.fetchmany(count) | |||
return select_res | |||
# 释放资源 | |||
def release(self): | |||
self.coon.close() | |||
self.cur.close() | |||
if __name__ == '__main__': | |||
[{'Tables_in_idc': 'gjc'}, | |||
{'Tables_in_idc': 'gjc2'}, | |||
{'Tables_in_idc': 'idc_dept'}, | |||
{'Tables_in_idc': 'idc_project'}, {'Tables_in_idc': 'idc_project_check'}, | |||
{'Tables_in_idc': 'idc_project_check_detail'}, {'Tables_in_idc': 'idc_project_module'}, | |||
{'Tables_in_idc': 'idc_project_module_check'}, {'Tables_in_idc': 'idc_project_module_check_detail'}, | |||
{'Tables_in_idc': 'idc_user'}, {'Tables_in_idc': 'idc_user_dept'}, {'Tables_in_idc': 'mk2'}] | |||
# print(ConnMysql().sql_select_many("show tables;")) | |||
mysql = ConnMysql() | |||
# mysql.sql_change_msg("""insert into idc_project (project_name,file_path) value ("%s", "%s")""" % ("森林火险", "/opt/idc/file/20220924/79a53829-8965-4aof-a342-c532f6c9c2a3森林火险.xlsx")) | |||
# print(mysql.sql_select_many("""select * from gjc""")) | |||
# print(mysql.sql_select_many("""select * from gjc2 where id=dup_file_test""")) | |||
# print(mysql.sql_select_many("""select * from xmnr""")) | |||
# print(mysql.sql_select_many("""select * from gjc_copy1""")) | |||
# print(mysql.sql_select_one("""select * from idc_project_check""")) | |||
# print(mysql.sql_select_one("""select * from idc_project_check_detail""")) | |||
# print(mysql.sql_select_many("""select * from idc_project_module""")) | |||
# print(mysql.sql_select_many("""select * from idc_project_module where project_id=%d""" % int(7))) | |||
# print( mysql.sql_select_one("""select dup_id from idc_project_check where project_id=%d"""% int(7))) | |||
# print(len(mysql.sql_select_many("""select * from xmnr_copy1"""))) | |||
# print(len(mysql.sql_select_many("""select * from user_history_data"""))) | |||
print(len(mysql.sql_select_many("""select * from user_history_data"""))) | |||
"""查重复select * from user_history_module_data where gnms in (select gnms from user_history_module_data group by gnms having count(gnms)>1); | |||
""" | |||
# print() | |||
# str_dict={} | |||
# cmnr_count=551 | |||
# gnmkcount=1192 | |||
# | |||
# print(mysql.sql_change_msg( | |||
# """update idc_project set company_name=%s, dup_status=3, one_vote_veto_status=dup_file_test, self_check_status=dup_file_test, history_project_count=%d ,module_count=%d where project_id=%d""" % ( | |||
# str_dict.get('sbdw'), xmnr_count=551, gnmk_count=1192, 104))) | |||
# print(mysql.sql_change_msg( | |||
# """update idc_project set dup_status=3, one_vote_veto_status=dup_file_test, self_check_status=dup_file_test, history_project_count=%d ,module_count=%d where project_id=%d""" % ( | |||
# ) | |||
# for k, v in mysql.sql_select_one("""select * from idc_project_check_detail""").items(): | |||
# print(k, v) |
@@ -0,0 +1,41 @@ | |||
certifi==2022.6.15 | |||
cffi==1.15.1 | |||
chardet==5.0.0 | |||
charset-normalizer==2.0.12 | |||
click==8.0.4 | |||
colorama==0.4.5 | |||
cryptography==3.4.7 | |||
dataclasses==0.8 | |||
DBUtils==3.0.2 | |||
et-xmlfile==1.1.0 | |||
Flask==1.0.2 | |||
idna==3.3 | |||
importlib-metadata==4.8.3 | |||
itsdangerous==2.0.1 | |||
jieba==0.42.1 | |||
Jinja2==3.0.3 | |||
joblib==1.1.0 | |||
MarkupSafe==2.0.1 | |||
numpy==1.19.5 | |||
openpyxl==3.0.10 | |||
pandas==1.1.5 | |||
pdfminer.six==20211012 | |||
pdfplumber==0.6.0 | |||
Pillow==8.4.0 | |||
pycparser==2.21 | |||
PyMySQL==0.10.1 | |||
pypiwin32==223 | |||
python-dateutil==2.8.2 | |||
pytz==2022.2.1 | |||
pywin32==304 | |||
requests==2.27.1 | |||
scikit-learn==0.24.2 | |||
scipy==1.5.4 | |||
six==1.16.0 | |||
threadpoolctl==3.1.0 | |||
typing_extensions==4.1.1 | |||
urllib3==1.26.12 | |||
Wand==0.6.10 | |||
Werkzeug==2.0.3 | |||
xlrd==1.2.0 | |||
zipp==3.6.0 |