@@ -0,0 +1,78 @@ | |||
# 填充API Key与Secret Key | |||
import requests | |||
import json | |||
import pandas as pd | |||
import re | |||
# 获取百度token验证 | |||
def get_access_token(): | |||
app_key = "0nbZsMNAWGCU7rLp6olAXVUG" | |||
app_secret = "gWgVIEMpf85npswY0XahUncx6aZGa8e3" | |||
url = f"https://aip.baidubce.com/oauth/2.0/token?client_id={app_key}&client_secret={app_secret}&grant_type=client_credentials" | |||
payload = json.dumps("") | |||
headers = { | |||
'Content-Type': 'application/json', | |||
'Accept': 'application/json' | |||
} | |||
response = requests.request("POST", url, headers=headers, data=payload) | |||
return response.json().get("access_token") | |||
# 使用百度文心一言获取文本相识度 | |||
def CallResult(prompt): | |||
token = "24.b7829fa01d73e3a4187c73fe7e27316c.2592000.1696475561.282335-38769936" | |||
headers = { | |||
'Content-Type': 'application/json', | |||
} | |||
data = json.dumps({ | |||
'temperature': 0.1, | |||
'top_p': 0.8, | |||
"penalty_score": 1.0, | |||
'messages': [ | |||
{ | |||
"role": "user", # 提问者角色 | |||
"content": prompt # 提问内容 | |||
} | |||
], | |||
}) | |||
# print("ChatGLM prompt:",prompt) | |||
# 调用api | |||
response = requests.post(f"https://aip.baidubce.com/rpc/2.0/ai_custom/v1/wenxinworkshop/chat/completions?access_token={token}",headers=headers,data=data) | |||
if response.status_code != 200: | |||
return 0, "查询结果错误" | |||
resp = response.json() | |||
print(f"查重结果: {resp['result']}") | |||
pattern = r"\d+\.?\d*" | |||
nums = re.findall(pattern, resp['result']) | |||
if len(nums) > 0: | |||
print("提取到的数字:", nums[0]) | |||
n = float(nums[0]) | |||
if n is None: | |||
return 0, "" | |||
# resp['result'] = str(resp['result']).replace("\n", " ") | |||
# prompt = prompt.replace("\n", " ") | |||
# with open('train.json', 'a') as file: | |||
# file.write("{" + f""""content": "{prompt}","summary": "{resp['result']}" """ + "}\n") | |||
return n, resp['result'] | |||
return 0, "" | |||
# 整理百度返回的格式 | |||
def format_data(prompt, result): | |||
with open('train.json', 'w') as file: | |||
file.write(f"") | |||
return | |||
if __name__ == '__main__': | |||
access_token = get_access_token() | |||
print(access_token) | |||
result = CallResult("告诉我下面两段话的重复率百分比是多少:1. 城市总体态势主要从平安指数、生态环保、实有人口、重点人员、重点场所、防灾防控、宏观经济、城市管理、城市监测、事件统计的角度,展示丽水各项城市指标的运行情况。2. 为实现各模块的数据数量、数据接入、历史分析以及部门工作的情况,需要将各模块情况接入分析形成督办驾驶舱。数字化生成:根据市委市政府领导关心的驾驶舱数据生成和展示相关的运行指标,利用数据可视化技术,通过数据驾驶舱方式集中展示,让领导通过一屏,即可清晰掌握驾驶舱数据指标生成相关的实时情况。数字化生成相关的数据指标主要包括接入驾驶舱预警指标、优质指标、专题页面总数、数据指标总数、涉及部门总数、自动化率、涉及接口总数、采用数据直报的指标总数、数据直报完成率、延迟率、整改率、接口故障率、当前接口故障数场景应用相关统计、接入业务系统相关统计、等30余个统计维度指标。数字化督办:根据城市管理指挥中心工作人员针对每日会商工作推进过程中关心的相关指标,利用数据可视化技术,通过数据驾驶舱方式集中展示,通过数字化督办驾驶舱即可清晰掌握每日会商的工作成效和部门工作情况,方便城市管理指挥中心完善和优化每日会商工作。指标主要包括会商需关注指标数、指标批示数、事项批示数、交办情况、督办情况、部门应急预案相关统计、数字化督办相关指标、带班领导会商次数、议题数等不少于10个统计维度指标。") | |||
print("回答结果:", result) | |||
# 使用正则表达式提取小数和整数 | |||
# pattern = r"\d+\.?\d*" | |||
# nums = re.findall(pattern, result) | |||
# if len(nums) > 0: | |||
# print("提取到的数字:", nums[0]) |
@@ -0,0 +1,38 @@ | |||
import os | |||
os.environ['KMP_DUPLICATE_LIB_OK']='True' | |||
# 方法1 | |||
from modelscope.pipelines import pipeline | |||
from modelscope.utils.constant import Tasks | |||
# damo/nlp_corom_sentence-embedding_chinese-base-ecom | |||
# damo/nlp_corom_sentence-embedding_chinese-base | |||
# damo/nlp_corom_sentence-embedding_chinese-tiny | |||
model_id = "damo/nlp_corom_sentence-embedding_chinese-base-ecom" | |||
# model_id = "damo/nlp_gpt3_text-generation_chinese-base" | |||
pipeline_se = pipeline(Tasks.sentence_embedding, model=model_id, max_length=1024) | |||
# | |||
# 当输入包含“soure_sentence”与“sentences_to_compare”时,会输出source_sentence中首个句子与sentences_to_compare中每个句子的向量表示,以及source_sentence中首个句子与sentences_to_compare中每个句子的相似度。 | |||
sentences_to_compare = [ | |||
'''根据不同的季度、网红热点信息主动给用户推送热门景区和景点的游玩信息''', | |||
'''开发一个在线商品销售商城,提供旅游路线上的相关特产和旅游商品的查阅及下单功能''', | |||
'''推送相关旅游产品折扣信息以及景区景点举办的篝火晚会、烟花盛宴等活动内容,可通过线下报名参与。''' | |||
] | |||
inputs = { | |||
"source_sentence": [ | |||
# '''模块功能:提供商家信息查询和种植区域查询等功能。功能描述:商家信息包括名称、地址、法人、联系方式、经营范围、投资规模等,种植区域主要是指丽水香茶种植区域的面积、位置、高程等信息,政府工作人员可根据多个查询条件完成上述信息的查询。''' | |||
'''展示乡村游中相关乡村举办的庆典活动,包含庆典举办时间、内容等''' | |||
], | |||
"sentences_to_compare": sentences_to_compare | |||
} | |||
result = pipeline_se(input=inputs) | |||
print(result["scores"]) | |||
arr = result["scores"] | |||
max_value = max(arr) | |||
max_index = arr.index(max_value) | |||
print("最大值:", max_value) | |||
print("最相识内容:", sentences_to_compare[max_index]) | |||
# # | |||
# |
@@ -0,0 +1,88 @@ | |||
import jieba | |||
import pandas as pd | |||
from sklearn.feature_extraction.text import TfidfVectorizer | |||
from sklearn.feature_extraction.text import TfidfTransformer | |||
from sklearn.feature_extraction.text import CountVectorizer | |||
from sklearn.metrics.pairwise import cosine_similarity, pairwise_distances | |||
import math | |||
jieba.suggest_freq('以太坊', True) | |||
jieba.suggest_freq('区块链', True) | |||
jieba.suggest_freq('数字货币', True) | |||
jieba.suggest_freq('将于', True) | |||
jieba.suggest_freq('人人网', True) | |||
jieba.suggest_freq('比特币', True) | |||
jieba.suggest_freq('北上广', True) | |||
jieba.suggest_freq('大数据', True) | |||
jieba.suggest_freq('云计算', True) | |||
jieba.suggest_freq('公有链', True) | |||
# 引用停用词 | |||
stpwrdpath = "./stop_words.utf8" | |||
stpwrd_dic = open(stpwrdpath, 'rb') | |||
stpwrd_content = stpwrd_dic.read() | |||
# 将停用词表转换为list | |||
stpwrdlst = stpwrd_content.splitlines() | |||
stpwrd_dic.close() | |||
# vector = TfidfVectorizer(stop_words=stpwrdlst) | |||
def get_xls_data(): | |||
# 获取数据 | |||
# data = pd.read_excel("./0825-丽水系统查重维度.xlsx", names=["项目名称", "数字百山祖(一期)—“云值守”建设方案"], sheet_name='Sheet1') | |||
# content_ls_1 = [(x, y) for x, y in enumerate(data[0]["项目名称"]) if y] | |||
# #print(content_ls_1) | |||
# content_ls_2 = [(x, y) for x, y in enumerate(data[0]["数字百山祖(一期)—“云值守”建设方案"]) if y] | |||
content_ls_1 = [("content", """通过本项目的实施,可以真实贯彻以人民为中心的发展思想,按 照政府办事“一件事”全流程“最多跑一次”的理念和目标,深化“四 张清单一张网”改革,从与群众和卫生健康监管对象关系最紧密的领 域和事项做起,充分运用“互联网+智慧监管”和大数据,促进卫生 健康领域风险监管创新,使群众和企业对综合行政执法改革的获得感 明显增强、综合行政执法效率明显提升、卫生健康领域环境进一步改 善,着力防范化解卫生健康领域重大风险,维护人民群众健康权益""")] | |||
content_ls_2 = [("content", """建成政府侧应用和企业侧应用,实现政府、工商联、商会、企业一体化协同应用,助力工商联全面摸排“浙江人经济”的底数,精准掌握省外浙商重点企业、产业、产业链以及省外浙江商会的情况,加强对在外浙商企业的日常联系和服务覆盖,以乡情为纽带,有效发挥在外浙商的产业优势、技术优势、市场优势、资源优势,抢抓国内大循环的制高点,推动产业链招商、精准靶向招商,开展政策实施情况第三方评估,促进浙江人经济与浙江经济融合发展,助力我省高质量发展建设共同富裕示范区。""")] | |||
content_ls = [] | |||
for x in content_ls_1: | |||
for y in content_ls_2: | |||
if x[0] == y[0]: | |||
content_ls.append((x[1], y[1])) | |||
# 数据分词 | |||
print("语料长度:" + str(len(content_ls))) | |||
similarity_length = 0 | |||
for x in content_ls: | |||
# print([get_jieba_doc(x[0]), get_jieba_doc(x[1])]) | |||
vectorizer = CountVectorizer() | |||
transformer = TfidfTransformer() | |||
# tfidf = transformer.fit_transform(vectorizer.fit_transform([get_jieba_doc(x[0]), get_jieba_doc(x[1])])) | |||
# print(cosine_similarity(tfidf)) | |||
# print("======================================") | |||
vector = TfidfVectorizer(max_df=10, min_df=1) | |||
tfidf = vector.fit_transform([get_jieba_doc(x[0]), get_jieba_doc(x[1])]) | |||
new_cosine_similarity = cosine_similarity(tfidf).tolist() | |||
if new_cosine_similarity[0][1] > 0.7: | |||
print(cosine_similarity(tfidf)) | |||
print("相似文本为:" + x[0]+" ||||| " + x[1]) | |||
print("==================") | |||
similarity_length = similarity_length + 1 | |||
print("相似语料长度:" + str(similarity_length)) | |||
print("相似度识别成功率:%s" % (similarity_length/len(content_ls))*100 + "%") | |||
def get_jieba_doc(document): | |||
document_cut = jieba.cut(document) | |||
try: | |||
return " ".join(document_cut) | |||
except Exception as e: | |||
print(e.message) | |||
# 计算向量夹角余弦 | |||
def VectorCosine(x, y): | |||
vc = [] | |||
for i in range(1, len(x)-2): | |||
xc1 = x[i] - x[i-1] | |||
xc2 = x[i+1] - x[i] | |||
yc1 = y[i] - y[i-1] | |||
yc2 = y[i+1] - y[i] | |||
vc.append((xc1*xc2+yc1*yc2)/(math.sqrt(xc1**2+yc1**2)*math.sqrt(xc2**2+yc2**2))) | |||
return vc | |||
if __name__ == '__main__': | |||
get_xls_data() |
@@ -0,0 +1,129 @@ | |||
# coding=utf-8 | |||
import re | |||
import html | |||
import jieba | |||
import jieba.analyse | |||
from sklearn.feature_extraction.text import TfidfVectorizer | |||
from sklearn.metrics.pairwise import cosine_similarity | |||
stopwords = open('stop_words.utf8', encoding='utf8') | |||
stopword_list = [k.strip() for k in stopwords.readlines() if k.strip() != ''] | |||
def replace_tongyici(keywords): | |||
# tongyici_tihuan.txt是同义词表,每行是一系列同义词,用tab分割 | |||
# 1读取同义词表:并生成一个字典。 | |||
combine_dict = {} | |||
for line in open("tongyici_tihuan.txt", "r"): | |||
seperate_word = line.strip().split(" ") | |||
num = len(seperate_word) | |||
for i in range(1, num): | |||
combine_dict[seperate_word[i]] = seperate_word[0] | |||
kws = [] | |||
for word in keywords: | |||
if word in combine_dict: | |||
word = combine_dict[word] | |||
kws.append(word) | |||
else: | |||
kws.append(word) | |||
return kws | |||
class CosineSimilarity(object): | |||
""" | |||
余弦相似度 | |||
""" | |||
def __init__(self, content_x1, content_y2): | |||
self.s1 = content_x1 | |||
self.s2 = content_y2 | |||
@staticmethod | |||
def extract_keyword(seq_str): # 提取关键词 | |||
# 正则过滤 html 标签 | |||
re_exp = re.compile(r'(<style>.*?</style>)|(<[^>]+>)', re.S) | |||
content = re_exp.sub(' ', seq_str) | |||
# html 转义符实体化 | |||
content = html.unescape(content) | |||
# 切割 | |||
jieba.load_userdict("user_dict.txt") | |||
seg = [i for i in jieba.cut(content, cut_all=True) if i != ''] | |||
# keywords = [k for k in jieba.cut(content, cut_all=True) if k != ' ' and k != '' and k not in stopword_list] | |||
keywords = [k for k in jieba.analyse.extract_tags("|".join(seg), topK=500, withWeight=False) if k != ' ' and k != '' and k not in stopword_list] | |||
# keywords = replace_tongyici(keywords) | |||
# 提取关键词 | |||
# keywords = jieba.analyse.extract_tags("|".join(seg), topK=500, withWeight=False, allowPOS=('n', 'nr', 'ns')) | |||
# keywords = jieba.analyse.extract_tags(content, topK=2000, withWeight=False) | |||
# print(keywords) | |||
# return keywords | |||
return [],keywords | |||
@staticmethod | |||
def one_hot(word_dict, keywords): # oneHot编码 | |||
# cut_code = [word_dict[word] for word in keywords] | |||
cut_code = [0]*len(word_dict) | |||
for word in keywords: | |||
cut_code[word_dict[word]] += 1 | |||
return cut_code | |||
def main(self): | |||
# 去除停用词 | |||
# jieba.analyse.set_stop_words('stop_words.utf8') | |||
# 提取关键词 | |||
seg1,keywords1 = self.extract_keyword(self.s1) | |||
seg2,keywords2 = self.extract_keyword(self.s2) | |||
# 词的并集 | |||
union = set(keywords1).union(set(keywords2)) | |||
# union = set(seg1).union(set(seg2)) | |||
# 编码 | |||
word_dict = {} | |||
i = 0 | |||
for word in union: | |||
word_dict[word] = i | |||
i += 1 | |||
# # oneHot编码 | |||
s1_cut_code = self.one_hot(word_dict, keywords1) | |||
s2_cut_code = self.one_hot(word_dict, keywords2) | |||
# s1_cut_code = self.one_hot(word_dict, seg1) | |||
# s2_cut_code = self.one_hot(word_dict, seg2) | |||
# stopwords = open('stop_words.utf8', encoding='utf8') | |||
# stopword_list = [k.strip() for k in stopwords.readlines() if k.strip() != ''] | |||
# stopwords.close() | |||
# vector = TfidfVectorizer(max_df=10, min_df=1) | |||
# tfidf = vector.fit_transform([" ".join(keywords1), " ".join(keywords2)]) | |||
# 余弦相似度计算 | |||
sample = [s1_cut_code, s2_cut_code] | |||
# 除零处理 | |||
try: | |||
sim = cosine_similarity(sample) | |||
# sim = cosine_similarity(tfidf).tolist() | |||
return sim[1][0],keywords1,keywords2 | |||
except Exception as e: | |||
print(e) | |||
return 0.0,keywords1,keywords2 | |||
# 测试 | |||
if __name__ == '__main__': | |||
# with open(r'D:\pythonDM\Ndkj\live111\result\1.txt', encoding='UTF-8') as x, open(r'D:\pythonDM\Ndkj\live111\result\2.txt', encoding='UTF-8') as y: | |||
# content_x = x.read() | |||
# content_y = y.read() | |||
content_x = """中英文双语版本开发建设,为平台提供国际化能力,对平台APP端所有功能菜单以及所有官方维护内容进行中英翻译,实现中英双语的APP版本,同时提供版本一键切换功能,提升一机游丽水平台服务的全面性,将一机游丽水打造成全国智慧文旅平台领域专业、专注、领先的范本。""" | |||
content_y = """(1)诉求受理、分流功能: 用户可以对进入统一受理中心的诉求信息进行识别,对有效且需要分流的诉求进行受理、分派操作。操作后,诉求自动进入下一个流程环节,操作后信息状态变为无效信息。对应的诉求状态变化会同步通知诉求来源系统。 (2)诉求结案回复、设为无效功能 用户对进入统一受理中心的诉求信息进行识别,对可以直接答复的信息进行回复并结案的操作,操作后诉求会自动结案。如诉求信息无效,则可以对其信息不受理操作,操作后信息状态变为无效信息。对应的诉求状态变化会同步通知诉求来源系统。 诉求流转跟踪视图用户可在统一受理中心的工作台上看到已分派的系统列表,信息详情中会展示该诉求的处理流程,内部和外部系统的处理过程都可以看到,方便用户掌握诉求的进展以便对诉求流转进行跟踪。 (3)自动分类、分流: 统一受理中心通过大数据分析,对诉求内容的语义解析算法,提取出该诉求的事件分类自动填充到分流信息中,再通过事项清单配置,将负责该类型事件的处理对象系统自动填充到分流信息中。用户只需核对系统填充信息即可实现一键分派。 (4)自动区分无效信息: 统一受理中心通过大数据分析,对诉求内容的语义解析算法,将疑似无效内容的诉求信息标记出来,提供用户判断的依据,提高用户处理业务的效率。""" | |||
similarity = CosineSimilarity(content_x, content_y) | |||
# similarity = CosineSimilarity(file, file2) | |||
similarity = similarity.main() | |||
print(similarity) | |||
@@ -0,0 +1,104 @@ | |||
import os | |||
import docx | |||
import requests | |||
import mysql_pool | |||
from pymysql.converters import escape_string | |||
def read_docx(file_path): | |||
mysql = mysql_pool.ConnMysql() | |||
# print(os.path.abspath('丽水市本级信息化项目建设方案模板.docx')) | |||
# # 通过url获取文件 http://jobapi.ningdatech.com/prometheus-yw_file_service/files/20240116/5a75cb43d17d4f1589d455d21547ab0c.doc | |||
# url = "http://jobapi.ningdatech.com/prometheus-yw_file_service/files/20240919/669f323c5c824f89a34bf04a66105902.doc" | |||
# file_name = "丽水市本级信息化项目建设方案模板.docx" | |||
# file_path = os.path.join("temp", file_name) | |||
try: | |||
# r = requests.get(url) | |||
# with open(file_path, "wb") as code: | |||
# code.write(r.content) | |||
# # 转化文件格式 | |||
# convert_doc_to_docx(file_path, file_path.replace('.doc', '.docx')) | |||
# file_path = file_path.replace('.doc', '.docx') | |||
# 读取文件 | |||
# doc = docx.Document(os.path.abspath(file_path)) | |||
doc = docx.Document(file_path) | |||
# 是否开始获取文本 | |||
is_acquire = 0 | |||
is_project_name = 0 | |||
content = [] | |||
# 功能模块 | |||
feature_map = {} | |||
# 功能名 | |||
feature_name = "" | |||
# 项目名 | |||
xmmc = "" | |||
for para in doc.paragraphs: | |||
style = para.style.name | |||
print(f"style: {para.style.name}, value: {para.text}") | |||
if str(style).find('toc') == 1: | |||
continue | |||
# 获取文档项目名称 | |||
if para.text.find('项目名称') != -1: | |||
is_project_name = 1 | |||
elif para.text.find('项目类型') != -1: | |||
is_project_name = 0 | |||
if is_project_name == 1: | |||
if str(style).find('Heading') == -1 and str(style).find('toc') == -1: | |||
xmmc = para.text | |||
if para.text == '3.1.2 建设内容': | |||
is_acquire = 1 | |||
elif para.text == '3.2 整体架构设计': | |||
is_acquire = 0 | |||
if is_acquire == 1: | |||
if str(style).find('Heading') == -1: | |||
# print(f"content: {para.text}, style: {para.style.name}") | |||
feature_map[feature_name] = para.text | |||
# 重置功能名 | |||
feature_name = "" | |||
content.append(para.text) | |||
else: | |||
feature_map[para.text] = "" | |||
feature_name = para.text | |||
# 使用next函数逐个获取元素 | |||
for key, value in feature_map.items(): | |||
if key != "3.1.2 建设内容" and key != "": | |||
print(f"Key: {key}, Value: {value}") | |||
# 将功能描述入库 | |||
mysql.sql_change_msg( | |||
"""insert into user_history_module_data(xmmc,gnmc,gnms,line, remark) value("%s", "%s", "%s", "%s", "%s")""" % ( | |||
escape_string(xmmc), escape_string(key), escape_string(value), "", "自动拆解导入")) | |||
finally: | |||
# os.remove(file_path) | |||
print("删除文件") | |||
return "\n".join(content) | |||
def convert_doc_to_docx(doc_file, docx_file): | |||
try: | |||
if doc_file.endswith('.doc'): | |||
# 创建一个新的.docx文件 | |||
docx_document = docx.Document() | |||
# 读取.doc文件的内容 | |||
with open(doc_file, 'rb') as doc: | |||
content = doc.read() | |||
# 将.doc文件的内容写入.docx文件 | |||
docx_document.add_paragraph(content) | |||
# 保存.docx文件 | |||
docx_document.save(docx_file) | |||
finally: | |||
os.remove(doc_file) | |||
# file_path = "丽水市本级信息化项目建设方案模板.docx" | |||
# doc_content = read_docx() | |||
# print(doc_content) | |||
@@ -0,0 +1,54 @@ | |||
# coding=utf-8 | |||
from flask import Flask, redirect, url_for, request | |||
from flask import jsonify | |||
import docx_extract | |||
import mysql_pool | |||
import main1 | |||
# import xm | |||
# from xm import xsd | |||
app = Flask(__name__) | |||
# mysql = mysql_pool.ConnMysql() | |||
# 返回excel的保存地址 | |||
@app.route('/check/duplicates/<projectId>') | |||
def success(projectId): | |||
# file_type = request.args.get('fileType', 'excel') | |||
mysql=mysql_pool.ConnMysql() | |||
if int(projectId) == 0: | |||
data = mysql.sql_select_many("""select * from idc_project""") | |||
else: | |||
data = mysql.sql_select_many("""select * from idc_project where project_id=%s""" % projectId) | |||
print(data) | |||
data_list = [] | |||
for ap in data: | |||
# if os.path.exists(ap.get("file_path")): | |||
data_list.append((ap.get("project_id"), ap.get("file_path"), ap.get("project_name"))) | |||
# data_list.append((ap.get("project_id"), "C:/Users/PoffyZhang/Desktop/花园云(城市大脑)数字驾驶舱20230202.xls", ap.get("project_name"))) | |||
mysql.release() | |||
# print(data_list) | |||
main1.project_check(data_list) | |||
return jsonify({"code": 0, "data": data}) | |||
# 自动提取docx内容 | |||
@app.route('/check/docx/save', methods=['POST']) | |||
def docx_save(): | |||
data = request.get_json() | |||
if data["file_path"] != "": | |||
docx_extract.read_docx(data["file_path"]) | |||
return jsonify({"code": 0, "data": data}) | |||
# 去数据库idc_project里面拿数据,获取比如project_id=11,根据file_path地址拿到要开始处理的数据 | |||
if __name__ == '__main__': | |||
app.run(host="0.0.0.0", port=19099) | |||
# insert_history_data_total.update_desc() | |||
print("run server ...") | |||
# app.run(port=19097) |
@@ -0,0 +1,181 @@ | |||
import requests | |||
import json | |||
import re | |||
def CallResult(prompt): | |||
headers = { | |||
'Content-Type': 'application/json', | |||
'Cookie': 'MODEL_USER_AUTH=4879b93405ebb89cad144590f0a4873f#3', | |||
} | |||
data = json.dumps({ | |||
'reqSource': "private", | |||
'reqParams': { | |||
'busCode': "fpExtract", | |||
'degree': "low", | |||
'fpRequire': prompt, | |||
}, | |||
}) | |||
# print("ChatGLM prompt:",prompt) | |||
# 调用api | |||
response = requests.post("http://81.70.174.229:9000/api/serveChannel",headers=headers,data=data) | |||
if response.status_code != 200: | |||
return "查询结果错误" | |||
resp = response.json() | |||
return resp['data'] | |||
# 调用glm | |||
def CallResultNew(prompt): | |||
headers = { | |||
'Content-Type': 'application/json', | |||
'Cookie': 'MODEL_USER_AUTH=92962ed4181f5221b20faaad1c42b3b8#3', | |||
} | |||
# 调用api | |||
if prompt != "": | |||
url = f'http://81.70.174.229:8090/smarty/fpNameExtract?fpRequire={prompt}&modelType=llm' | |||
response = requests.get(url, headers=headers) | |||
if response.status_code != 200: | |||
return "查询结果错误" | |||
resp = response.json() | |||
return resp['data'] | |||
def CallContentResult(prompt): | |||
content = "" | |||
seqs = re.split("。", prompt) | |||
for seq_ele in seqs: | |||
if seq_ele != '': | |||
headers = { | |||
'Content-Type': 'application/json', | |||
'Cookie': 'MODEL_USER_AUTH=4879b93405ebb89cad144590f0a4873f#3', | |||
} | |||
data = json.dumps({ | |||
'reqSource': "private", | |||
'reqParams': { | |||
'busCode': "fpExtract", | |||
'degree': "low", | |||
'fpRequire': seq_ele.replace("\n", "").replace(" ", ""), | |||
}, | |||
}) | |||
# 调用api | |||
response = requests.post("http://81.70.174.229:9000/api/serveChannel", headers=headers, data=data) | |||
if response.status_code != 200: | |||
return "查询结果错误" | |||
resp = response.json() | |||
glm_data =resp['data'] | |||
print(f'glm_data = {glm_data}') | |||
act_list_str = ",".join(glm_data["actList"]) | |||
obj_list_str = ",".join(glm_data["objList"]) | |||
content = content + act_list_str + "|" + obj_list_str + " -> " | |||
print(content) | |||
return content | |||
def CallContentResultNew(prompt): | |||
content = "" | |||
seqs = re.split("。", prompt) | |||
for seq_ele in seqs: | |||
if seq_ele != '': | |||
headers = { | |||
'Content-Type': 'application/json', | |||
'Cookie': 'MODEL_USER_AUTH=92962ed4181f5221b20faaad1c42b3b8#3', | |||
} | |||
url = f'http://81.70.174.229:8090/smarty/fpNameExtract?fpRequire={prompt}&modelType=llm' | |||
response = requests.post(url, headers=headers) | |||
if response.status_code != 200: | |||
return "查询结果错误" | |||
resp = response.json() | |||
glm_datas = resp['data'] | |||
for glm_data in glm_datas: | |||
name = glm_data["name"] | |||
content += name | |||
content = content.replace("<br/>", ",") | |||
return content | |||
def AutoDLResult(prompt): | |||
# prompt = prompt.replace("\n", " ") | |||
# print(f"prompt: {prompt}") | |||
# url = f"http://10.100.148.24:8000" | |||
# payload = json.dumps({ | |||
# # 'top_p': 0.8, | |||
# # 'temperature': 0.1, | |||
# 'prompt': prompt, | |||
# }) | |||
# headers = { | |||
# 'Content-Type': 'application/json', | |||
# 'Accept': 'application/json' | |||
# } | |||
# response = requests.request("POST", url, headers=headers, data=payload) | |||
# desc = response.json().get("response") | |||
# print(f"desc : {desc}") | |||
# pattern = r"\d+\.?\d*" | |||
# nums = re.findall(pattern, desc) | |||
# if len(nums) > 0: | |||
# print("提取到的数字:", nums[0]) | |||
# n = float(nums[0]) | |||
# if n is None: | |||
# return 0, "" | |||
# return n, filter_emoji(desc) | |||
return 0, "" | |||
def AutoDLResultNoNum(prompt): | |||
# prompt = prompt.replace("\n", " ") | |||
# print(f"prompt: {prompt}") | |||
# url = f"http://10.100.148.24:8000" | |||
# payload = json.dumps({ | |||
# # 'top_p': 0.8, | |||
# # 'temperature': 0.1, | |||
# 'prompt': prompt, | |||
# }) | |||
# headers = { | |||
# 'Content-Type': 'application/json', | |||
# 'Accept': 'application/json' | |||
# } | |||
# response = requests.request("POST", url, headers=headers, data=payload) | |||
# desc = response.json().get("response") | |||
# print(f"desc : {desc}") | |||
# return desc | |||
return "" | |||
def qwenResult(sentence1, sentence2): | |||
url = f"http://127.0.0.1:5010/api/v1/compare" | |||
payload = json.dumps({ | |||
'sentence1': sentence1, | |||
'sentence2': sentence2, | |||
}) | |||
headers = { | |||
'Content-Type': 'application/json', | |||
'Accept': 'application/json' | |||
} | |||
response = requests.request("POST", url, headers=headers, data=payload) | |||
desc = response.json().get("data") | |||
print(f"desc : {desc}") | |||
return desc | |||
# 去除文本中的特殊表情 | |||
def filter_emoji(desstr, restr=''): | |||
# 过滤表情 | |||
try: | |||
co = re.compile(u'[\U00010000-\U0010ffff]') | |||
except re.error: | |||
co = re.compile(u'[\uD800-\uDBFF][\uDC00-\uDFFF]') | |||
return co.sub(restr, desstr) | |||
if __name__ == '__main__': | |||
# text = "这是一个带有表情符号😊的字符" | |||
# text = filter_emoji(text) | |||
# content_x = '''会议管理,支持相关人员建立评审会议,评审会议和项目关联,并支持后续的专家抽取、结果确认等。会议建立完成后,相关人员可选择专家抽取模式,手动抽取是从专家库中手动筛选及选取专家参加评审。会议建立完成后,相关人员可选择专家抽取模式,自动抽取实现专家按标签自动抽取,抽取成功后生成会议通知、会议签到单。''' | |||
# content_y = '''1、专家库管理概述:实现和省级专家库对接,获取专家信息,并实现用户自行录入专家信息。同时要实现专家自定义抽取、抽取规则模板设置、在线短信与智能语音通知,专家赴会反馈等功能,继而实现专家线上管理。同时需要建立信创专家和信息化专家两个大类别,两个类别可重合。2、专家库基础信息:(1)统一的专家基础信息:构建统一的专家信息,同时与现有专家库进行数据同步,实现信息化项目专家信息的统一共享。支持多维度的专家查询通道。(2)标签管理:实现专家标签管理,为不同的专家贴上擅长领域标签,让专家擅长领域更加直观,让单位建设信息化项目有一个对应标签的专家支撑,为项目单位信息化项目保驾护航,减少单位信息化项目建设的风险。(3)专家卡片:专家卡片涵盖专家基本信息、专业标签信息以及参与信息化项目履职情况,形成一个动态的专家档案信息。(4)自定义设计:专家登录系统后可自行修改完善自身专家信息。信息需数管中心审核后方可生效。3、专家抽取管理:实现已有专家中,可根据不同类型筛选,并随机抽取对应专家,系统通过对接短信平台和智能语音平台来自动发送评审会议通知,专家实施反馈。(1)需在抽取管理中预设抽取规则模板:模板一:1000万预算以下项目,抽取5名评审专家,先抽取1名信创专家,待信创专家确认参会后再抽取4名信息化专家。模板二:1000万预算(含)以上项目抽取7名评审专家,先抽取1名信创专家,待信创专家确认参会后再抽取6名信息化专家。支持多轮抽取,已确保专家随机抽取的有效性。(2)专家评审通知规则:抽中的专家立即通过智能语音电话进行通知,专家选择参会后以短信形式再次发送项目评审会的时间、地址、项目名称信息给参会专家。如抽中专家接了电话未反馈是否参会或拒绝参会,系统自动进行下一轮抽取。如抽中的专家未接电话,10分钟后再拨打一次,如还未接,判断为不参会,系统自动进行下一轮抽取。直至抽满足够人数为止。(3)抽取规避原则:1)信创专家会和信息化专家有重叠,在信创专家中抽中的专家在后续信息化专家抽取中要规避。2)如有专家是在此申报单位中任职的,系统要自动规避抽取。(4)评审会信息修改与取消:需要实现评审会评审时间或地点修改,可以语音电话通知和短信通知相应专家的功能。需要实现评审会取消与通知,可以语音电话通知和短信通知相应专家的功能。(5)自动抽取失败处理:如专家自动抽取中发生反馈参会专家数量少于所需专家数量时,需要提实现手动指定专家参会的功能。(6)专家请假:提供专家评审会请假功能,已抽中并参加评审会的专家,如后续需要请假,可由两种方式进行申请:(1)专家电话联系评审会邀请人进行请假,评审会邀请人在系统中录入并确认该专家请假,邀请人可再次发起专家抽取进行补充专家。(2)专家登录系统进行申请请假,请假申请后系统自动再抽取一轮专家进行补充,并将变更信息通知评审会邀请人。4、专家履职: 可在系统中,查看专家参与评审的所有项目记录信息,并履行相关的项目审查工作。''' | |||
# str_con = AutoDLResultNoNum(f"告诉我下面两段话的重复率百分比是多少: \n第一段话是:'{content_x}', \n ----------------- \n 第二段话是:'{content_y}'") | |||
# print(str_con) | |||
# qwenResult("你好啊", "你真好") | |||
string = "相似度等级:高 原因:这两段话都涉及到信息" | |||
print(string[6:7]) |
@@ -0,0 +1,410 @@ | |||
# coding=utf-8 | |||
import re | |||
import mysql_pool | |||
from pymysql.converters import escape_string | |||
import cosin_similarity | |||
import pandas as pd | |||
import glm_utils | |||
import os | |||
import json | |||
wdys1 = { | |||
"项目名称": "xmmc", | |||
"现状问题": "xzwt", | |||
"系统基础": "xtjc", | |||
"项目目标": "xmmb", | |||
"预期绩效": "yqjx", | |||
"建设需求": "jsxq", | |||
"数据需求": "sjxq", | |||
"安全需求": "aqxq", | |||
"业务领域": "ywly", | |||
"核心业务": "hxyw", | |||
"业务需求": "ywxq", | |||
"业务协同": "ywxt", | |||
"建设层级": "jscj", | |||
"用户范围": "yhfw", | |||
"目标群体": "mbqt", | |||
"建设内容": "jsnr", | |||
"功能模块": "gnmk", | |||
"数据共享": "sjgx", | |||
"智能要素": "znys", | |||
"申报单位": "sbdw", | |||
"所属地区": "ssdq", | |||
"预算年度": "ysnd" | |||
} | |||
wdys2 = { | |||
"xmmc": "项目名称", | |||
"xzwt": "现状问题", | |||
"xtjc": "系统基础", | |||
"xmmb": "项目目标", | |||
"yqjx": "预期绩效", | |||
"jsxq": "建设需求", | |||
"sjxq": "数据需求", | |||
"aqxq": "安全需求", | |||
"ywly": "业务领域", | |||
"hxyw": "核心业务", | |||
"ywxq": "业务需求", | |||
"ywxt": "业务协同", | |||
"jscj": "建设层级", | |||
"yhfw": "用户范围", | |||
"mbqt": "目标群体", | |||
"jsnr": "建设内容", | |||
"gnmk": "功能模块", | |||
"sjgx": "数据共享", | |||
"znys": "智能要素", | |||
"sbdw": "申报单位", | |||
"ssdq": "所属地区", | |||
"ysnd": "预算年度" | |||
} | |||
gnmkys = { | |||
"gnmc": "功能名称", | |||
"gnms": "功能描述" | |||
} | |||
def getFlag(): | |||
data_dict = {} | |||
df = pd.read_excel("0825-丽水系统查重维度.xlsx") | |||
data = df.values | |||
data = list(pd.Series(data[:, 1]).dropna()) | |||
for d in data: | |||
try: | |||
wd = re.search("(.*?)(.*?%)", d).group(1).strip() | |||
wdc = wdys1.get(wd) | |||
if wdc: | |||
qz = re.search(".*?((.*?%))", d).group(1) | |||
data_dict[wdc] = qz | |||
except: | |||
pass | |||
return data_dict | |||
# getFlag() | |||
def gong_neng_mo_kuai(xmmc, mysql, dl, data, er_title, line): | |||
# 将excel文件中的所有第三维度内容进行拼接 | |||
str_dict = {} | |||
for et in er_title: | |||
for d in data: | |||
if d[1] == et: | |||
if str_dict.get(et): | |||
str_dict[et] = str_dict.get(et) + d[3] | |||
else: | |||
str_dict[et] = d[3] | |||
for k, v in str_dict.items(): | |||
mysql.sql_change_msg("""insert into user_history_module_data(xmmc,gnmc,gnms,line, remark) value("%s", "%s", "%s", "%s", "%s")""" % ( | |||
escape_string(xmmc), escape_string(k), escape_string(v), line, "")) | |||
# | |||
similarity = cosin_similarity.CosineSimilarity(v, v) | |||
similarity, keywords_x, keywords_y = similarity.main() | |||
mysql.sql_change_msg("""insert into user_history_module_keywords (xmmc,gnmc,gnms,line) value("%s" ,"%s", "%s", "%s")""" % ( | |||
xmmc, escape_string(k), str(keywords_y)[1:-1], line)) | |||
def project_check(data_list, line): | |||
mysql = mysql_pool.ConnMysql() | |||
# 读取维度和权重 | |||
# get_data_dict = getFlag() | |||
# 遍历excel存储路径 | |||
for dl in data_list: | |||
# path = "0825-丽水系统查重维度1.xlsx" | |||
# 读取路径下的excel | |||
print(dl) | |||
df = pd.read_excel(dl[1]) | |||
xmmc = df.keys() | |||
# print(type(xmmc[dup_file_test])) | |||
xmmc=xmmc[1] | |||
# print(type(xmmc)) | |||
# xmmc1='' | |||
if "可研报告"or "可研性报告"or "可行性研究报告" in xmmc: | |||
xmmc=xmmc.replace('可研报告','') | |||
xmmc=xmmc.replace('可研性报告','') | |||
xmmc=xmmc.replace('可行性研究报告','') | |||
# print(xmmc) | |||
data = df.values | |||
# 将excel文件中的所有维度内容进行拼接 | |||
join_str = "" | |||
str_dict = {} | |||
title = "" | |||
er_title = set() | |||
# for d in data: | |||
# # print(d) | |||
# if pd.notnull(d[0]): | |||
# title = d[0] | |||
# if title == "功能模块": | |||
# er_title.add(d[dup_file_test]) | |||
# join_str = "" | |||
# for i in d[dup_file_test:]: | |||
# if pd.notnull(i): | |||
# join_str += i | |||
# str_dict[wdys1.get(title)] = join_str | |||
# else: | |||
# if title == "功能模块": | |||
# er_title.add(d[dup_file_test]) | |||
# for i in d[dup_file_test:]: | |||
# if pd.notnull(i): | |||
# join_str += i | |||
# str_dict[wdys1.get(title)] = str_dict.get(wdys1.get(title)) + join_str | |||
# print(str_dict) | |||
gnmk_str = [] | |||
# print(data) | |||
for d in data: | |||
if pd.notnull(d[0]): | |||
title = d[0] | |||
if title == "功能模块": | |||
er_title.add(d[1]) | |||
join_str = "" | |||
for i in d[1:]: | |||
# print(type(i)) | |||
# i=str(i) | |||
if pd.notnull(i): | |||
join_str += str(i) | |||
if title == "功能模块": | |||
# for j in d[3:]: | |||
if i == '功能描述': | |||
continue | |||
else: | |||
gnmk_str.append(i) | |||
str_dict[wdys1.get(title)] = join_str | |||
# print(str_dict.get(wdys1.get(title))) | |||
else: | |||
if title == "功能模块": | |||
er_title.add(d[1]) | |||
for i in d[3:]: | |||
if pd.notnull(i): | |||
join_str += str(i) | |||
if title == "功能模块": | |||
gnmk_str.append(i) | |||
str_dict[wdys1.get(title)] = str_dict.get(wdys1.get(title)) + join_str | |||
# gnmk="".join(gnmk_str) | |||
# str_dict['gnmk']=gnmk | |||
gnmk = ",".join(gnmk_str) | |||
str_dict['gnmk'] = gnmk | |||
# print(str_dict) | |||
# print(str_dict.get("xzwt")if str_dict.get("xzwt") else None) | |||
# print(str_dict.get('gnmk')if str_dict.get('gnmk')else None) | |||
mysql.sql_change_msg( | |||
"""insert into user_history_data (xmmc, xzwt, xtjc, xmmb, yqjx, jsxq, sjxq, aqxq, ywly, hxyw, ywxq, ywxt, jscj, yhfw, mbqt, jsnr, gnmk, sjgx, znys,sbdw,ssdq,ysnd,line,remark) value ("%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s","%s","%s","%s","%s","%s")""" | |||
% (escape_string(xmmc), | |||
escape_string(str_dict.get("xzwt")) if str_dict.get("xzwt") else None, | |||
escape_string(str_dict.get("xtjc")) if str_dict.get("xtjc") else None, | |||
escape_string(str_dict.get("xmmb")) if str_dict.get("xmmb") else None, | |||
escape_string(str_dict.get("yqjx")) if str_dict.get("yqjx") else None, | |||
escape_string(str_dict.get("jsxq")) if str_dict.get("jsxq") else None, | |||
escape_string(str_dict.get("sjxq")) if str_dict.get("sjxq") else None, | |||
escape_string(str_dict.get("aqxq")) if str_dict.get("aqxq") else None, | |||
escape_string(str_dict.get("ywly")) if str_dict.get("ywly") else None, | |||
escape_string(str_dict.get("hxyw")) if str_dict.get("hxyw") else None, | |||
escape_string(str_dict.get("ywxq")) if str_dict.get("ywxq") else None, | |||
escape_string(str_dict.get("ywxt")) if str_dict.get("ywxt") else None, | |||
escape_string(str_dict.get("jscj")) if str_dict.get("jscj") else None, | |||
escape_string(str_dict.get("yhfw")) if str_dict.get("yhfw") else None, | |||
escape_string(str_dict.get("mbqt")) if str_dict.get("mbqt") else None, | |||
escape_string(str_dict.get("jsnr")) if str_dict.get("jsnr") else None, | |||
escape_string(str_dict.get("gnmk")) if str_dict.get("gnmk") else None, | |||
escape_string(str_dict.get("sjgx")) if str_dict.get("sjgx") else None, | |||
escape_string(str_dict.get("znys")) if str_dict.get("znys") else None, | |||
escape_string(str_dict.get("sbdw")) if str_dict.get("sbdw") else None, | |||
escape_string(str_dict.get("ssdq")) if str_dict.get("ssdq") else None, | |||
escape_string(str_dict.get("ysnd")) if str_dict.get("ysnd") else None, | |||
line, "")) | |||
project_gjc = {} | |||
for w in wdys2.keys(): | |||
content_x = str_dict.get(w) | |||
content_y = str_dict.get(w) | |||
if content_x and content_y: | |||
# 循环遍历每一个维度 | |||
similarity = cosin_similarity.CosineSimilarity(content_x, content_y) | |||
# 相似度 关键词 | |||
similarity, keywords_x, keywords_y = similarity.main() | |||
project_gjc[w] = keywords_y | |||
mysql.sql_change_msg( | |||
"""insert into user_history_keywords (xmmc, xzwt, xtjc, xmmb, yqjx, jsxq, sjxq, aqxq, ywly, hxyw, ywxq, ywxt, jscj, yhfw, mbqt, jsnr, gnmk, sjgx, znys, line) value ("%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s")""" | |||
% (xmmc, str(project_gjc.get("xzwt"))[1:-1] if project_gjc.get("xzwt") else None, | |||
str(project_gjc.get("xtjc"))[1:-1] if project_gjc.get("xtjc") else None, | |||
str(project_gjc.get("xmmb"))[1:-1] if project_gjc.get("xmmb") else None, | |||
str(project_gjc.get("yqjx"))[1:-1] if project_gjc.get("yqjx") else None, | |||
str(project_gjc.get("jsxq"))[1:-1] if project_gjc.get("jsxq") else None, | |||
str(project_gjc.get("sjxq"))[1:-1] if project_gjc.get("sjxq") else None, | |||
str(project_gjc.get("aqxq"))[1:-1] if project_gjc.get("aqxq") else None, | |||
str(project_gjc.get("ywly"))[1:-1] if project_gjc.get("ywly") else None, | |||
str(project_gjc.get("hxyw"))[1:-1] if project_gjc.get("hxyw") else None, | |||
str(project_gjc.get("ywxq"))[1:-1] if project_gjc.get("ywxq") else None, | |||
str(project_gjc.get("ywxt"))[1:-1] if project_gjc.get("ywxt") else None, | |||
str(project_gjc.get("jscj"))[1:-1] if project_gjc.get("jscj") else None, | |||
str(project_gjc.get("yhfw"))[1:-1] if project_gjc.get("yhfw") else None, | |||
str(project_gjc.get("mbqt"))[1:-1] if project_gjc.get("mbqt") else None, | |||
str(project_gjc.get("jsnr"))[1:-1] if project_gjc.get("jsnr") else None, | |||
str(project_gjc.get("gnmk"))[1:-1] if project_gjc.get("gnmk") else None, | |||
str(project_gjc.get("sjgx"))[1:-1] if project_gjc.get("sjgx") else None, | |||
str(project_gjc.get("znys"))[1:-1] if project_gjc.get("znys") else None, | |||
line)) | |||
gong_neng_mo_kuai(xmmc, mysql, dl, data, er_title, line) | |||
def update_desc(): | |||
mysql = mysql_pool.ConnMysql() | |||
module_list = mysql.sql_select_many("""select id, gnms from user_history_module_data where xmmc = '丽水市城市管理指挥中心信息系统(一期)项目'""") | |||
for module in module_list: | |||
# 通过chatglm进行提取信息 | |||
gnms = module.get("gnms") | |||
content = glm_utils.CallContentResultNew(gnms) | |||
mysql.sql_change_msg( | |||
"""UPDATE user_history_module_data SET glm_desc = "%s" WHERE id = %d""" % ( | |||
content if content else None, | |||
module.get("id"))) | |||
print(content) | |||
def update_desc1(): | |||
mysql = mysql_pool.ConnMysql() | |||
module_list = mysql.sql_select_many("""select id, gnms from gnms_gml where xmmc = '丽水花园云(城市大脑)数字驾驶舱项目'""") | |||
for module in module_list: | |||
# 通过chatglm进行提取信息 | |||
gnms = module.get("gnms") | |||
content = glm_utils.CallContentResultNew(gnms) | |||
mysql.sql_change_msg( | |||
"""UPDATE gnms_gml SET glm_desc = "%s" WHERE id = %d""" % ( | |||
content if content else None, | |||
module.get("id"))) | |||
print(content) | |||
def info_word_project(): | |||
mysql = mysql_pool.ConnMysql() | |||
module_list1 = mysql.sql_select_many( | |||
"""select jsnr from user_history_data where xmmc = '2023年丽水市云和县数字法治门户建设项目' """) | |||
module_list2 = mysql.sql_select_many( | |||
"""select jsnr from user_history_data where xmmc IN ('浙江省第二监狱重点罪犯管控模型项目', | |||
'浙江省农村水电站管理数字化应用', | |||
'浙江省河湖库保护数字化应用建设项目', | |||
'浙江省环境遥感监测业务智治', | |||
'平台项目', | |||
'浙江林业智媒平台项目', | |||
'未来e家应用建设方案', | |||
'浙江省智慧林业云平台升级改造项目建设方案', | |||
'为侨服务“全球通”平台二期建设项目')""") | |||
json_objects = [] | |||
for module_info1 in module_list1: | |||
for jsnr1Ele in module_info1["jsnr"].split('-----》'): | |||
for module_info2 in module_list2: | |||
for jsnr2Ele in module_info2["jsnr"].split('-----》'): | |||
str = "A:%s\nB:%s" % (jsnr1Ele, jsnr2Ele) | |||
data = { | |||
"instruction": "现在你是一个政府提案的查重检查人员,给定两段话A和B:让我们一步步思考并判断是否相似。请以相似度从高、中、低三个等级进行评价,并给出理由。", | |||
"input": str, | |||
"output": "" | |||
} | |||
json_objects.append(data) | |||
with open('其他-建设内容.json', 'w') as f: | |||
for json_obj in json_objects: | |||
json_str = json.dumps(json_obj, ensure_ascii=False) # 将JSON对象转换为字符串 | |||
f.write(json_str + '\n') # 写入字符串,并添加换行符 | |||
def info_word1(): | |||
mysql = mysql_pool.ConnMysql() | |||
# module_list1 = mysql.sql_select_many("""select gnms from user_history_module_data where xmmc = '莲智社区' """) | |||
# module_list2 = mysql.sql_select_many("""select gnms from user_history_module_data where xmmc IN ('古堰画乡智慧客厅项目—未来社区智慧服务平台', '未来e家')""") | |||
module_list1 = mysql.sql_select_many("""select gnms from user_history_module_data where xmmc = '丽水市遂昌县政法委数字法治综合应用' """) | |||
module_list2 = mysql.sql_select_many("""select gnms from user_history_module_data where xmmc IN ('浙江省第二监狱重点罪犯管控模型项目', | |||
'浙江省农村水电站管理数字化应用', | |||
'浙江省河湖库保护数字化应用建设项目', | |||
'浙江省环境遥感监测业务智治', | |||
'平台项目', | |||
'浙江林业智媒平台项目', | |||
'未来e家应用建设方案', | |||
'浙江省智慧林业云平台升级改造项目建设方案', | |||
'为侨服务“全球通”平台二期建设项目')""") | |||
json_objects = [] | |||
for module_info1 in module_list1: | |||
for module_info2 in module_list2: | |||
str = "A:%s\nB:%s" % (module_info1["gnms"], module_info2["gnms"]) | |||
data = { | |||
"instruction": "现在你是一个政府提案的查重检查人员,给定两段话A和B:让我们一步步思考并判断是否相似。请以相似度从高、中、低三个等级进行评价,并给出理由。", | |||
"input": str, | |||
"output": "" | |||
} | |||
json_objects.append(data) | |||
with open('其他-功能模块对比.json', 'w') as f: | |||
for json_obj in json_objects: | |||
json_str = json.dumps(json_obj, ensure_ascii=False) # 将JSON对象转换为字符串 | |||
f.write(json_str + '\n') # 写入字符串,并添加换行符 | |||
def info_word_project_yw(): | |||
mysql = mysql_pool.ConnMysql() | |||
module_list1 = mysql.sql_select_many( | |||
"""select jsnr from user_history_data where xmmc = '2023年丽水市云和县数字法治门户建设项目' """) | |||
module_list2 = mysql.sql_select_many( | |||
"""select jsnr from user_history_data where xmmc IN ('2023年丽水市云和县数字法治门户建设项目', '浙江省司法厅全域数字法治监督应用系统(一期)', '丽水市遂昌县政法委数字法治综合应用', '丽水市龙泉市政法委法治龙泉门户', '庆元县数字法治综合门户')""") | |||
json_objects = [] | |||
for module_info1 in module_list1: | |||
for jsnr1Ele in module_info1["jsnr"].split('-----》'): | |||
for module_info2 in module_list2: | |||
for jsnr2Ele in module_info2["jsnr"].split('-----》'): | |||
str = "A:%s\nB:%s" % (jsnr1Ele, jsnr2Ele) | |||
data = { | |||
"instruction": "现在你是一个政府提案的查重检查人员,给定两段话A和B:让我们一步步思考并判断是否相似。请以相似度从高、中、低三个等级进行评价,并给出理由。", | |||
"input": str, | |||
"output": "" | |||
} | |||
json_objects.append(data) | |||
with open('其他-建设内容.json', 'w') as f: | |||
for json_obj in json_objects: | |||
json_str = json.dumps(json_obj, ensure_ascii=False) # 将JSON对象转换为字符串 | |||
f.write(json_str + '\n') # 写入字符串,并添加换行符 | |||
if __name__ == "__main__": | |||
info_word1() | |||
print("ok.......") | |||
path = r"/Users/kebobo/Downloads/丽水/未来社区" | |||
data_list = os.listdir(path) | |||
for file in data_list: | |||
if file != '.DS_Store': | |||
data_list = [(0, path + '/' + file, "")] | |||
project_check(data_list, "2024-07-27-数字法治") | |||
print("已存入************************************* %s" % file) | |||
""" | |||
建设目标,业务功能 | |||
gnmk_str = [] | |||
for d in data: | |||
if pd.notnull(d[0]): | |||
title = d[0] | |||
if title == "功能模块": | |||
er_title.add(d[dup_file_test]) | |||
join_str = "" | |||
for i in d[dup_file_test:]: | |||
if pd.notnull(i): | |||
join_str += i | |||
if title == "功能模块": | |||
gnmk_str.append(i) | |||
str_dict[wdys1.get(title)] = join_str | |||
else: | |||
if title == "功能模块": | |||
er_title.add(d[dup_file_test]) | |||
for i in d[dup_file_test:]: | |||
if pd.notnull(i): | |||
join_str += i | |||
if title == "功能模块": | |||
gnmk_str.append(i) | |||
str_dict[wdys1.get(title)] = str_dict.get(wdys1.get(title)) + join_str | |||
gnmk = "".join(gnmk_str) | |||
""" | |||
@@ -0,0 +1,511 @@ | |||
# coding=utf-8 | |||
import sys | |||
import re | |||
import baidu | |||
import model_scope | |||
import mysql_pool | |||
from pymysql.converters import escape_string | |||
import cosin_similarity | |||
import pandas as pd | |||
import datetime | |||
import requests | |||
import glm_utils | |||
from threading import Thread | |||
wdys1 = { | |||
"项目名称": "xmmc", | |||
"现状问题": "xzwt", | |||
"系统基础": "xtjc", | |||
"项目目标": "xmmb", | |||
"预期绩效": "yqjx", | |||
"建设需求": "jsxq", | |||
"数据需求": "sjxq", | |||
"安全需求": "aqxq", | |||
"业务领域": "ywly", | |||
"核心业务": "hxyw", | |||
"业务需求": "ywxq", | |||
"业务协同": "ywxt", | |||
"建设层级": "jscj", | |||
"用户范围": "yhfw", | |||
"目标群体": "mbqt", | |||
"建设内容": "jsnr", | |||
"功能模块": "gnmk", | |||
"数据共享": "sjgx", | |||
"智能要素": "znys" | |||
} | |||
wdys2 = { | |||
"xmmc": "项目名称", | |||
"xzwt": "现状问题", | |||
"xtjc": "系统基础", | |||
"xmmb": "项目目标", | |||
"yqjx": "预期绩效", | |||
"jsxq": "建设需求", | |||
"sjxq": "数据需求", | |||
"aqxq": "安全需求", | |||
"ywly": "业务领域", | |||
"hxyw": "核心业务", | |||
"ywxq": "业务需求", | |||
"ywxt": "业务协同", | |||
"jscj": "建设层级", | |||
"yhfw": "用户范围", | |||
"mbqt": "目标群体", | |||
"jsnr": "建设内容", | |||
"gnmk": "功能模块", | |||
"sjgx": "数据共享", | |||
"znys": "智能要素" | |||
} | |||
gnmkys = { | |||
"gnmc": "功能名称", | |||
"gnms": "功能描述" | |||
} | |||
def getFlag(): | |||
data_dict = {} | |||
df = pd.read_excel("0825.xlsx") | |||
data = df.values | |||
data = list(pd.Series(data[:, 1]).dropna()) | |||
for d in data: | |||
try: | |||
wd = re.search("(.*?)(.*?%)", d).group(1).strip() | |||
wdc = wdys1.get(wd) | |||
if wdc: | |||
qz = re.search(".*?((.*?%))", d).group(1) | |||
data_dict[wdc] = qz | |||
except: | |||
pass | |||
return data_dict | |||
def gong_neng_mo_kuai(mysql, dl, data, er_title, str_dict_new): | |||
nlp = model_scope.Bert_nlp("corom") | |||
# 将excel文件中的所有第三维度内容进行拼接 | |||
str_dict = {} | |||
for et in er_title: | |||
for d in data: | |||
if d[1] == et: | |||
if str_dict.get(et): | |||
str_dict[et] = str_dict.get(et) + d[3] | |||
else: | |||
str_dict[et] = d[3] | |||
for k, v in str_dict.items(): | |||
mysql.sql_change_msg( | |||
"""insert into idc_project_module (project_id, check_duplicate_count, module_name, module_content, create_time, update_time, tag) value(%d, 1, "%s", "%s", "%s", "%s", "模块")""" % ( | |||
int(dl[0]), k, v, str(datetime.datetime.now())[:-7], str(datetime.datetime.now())[:-7])) | |||
module_id_list = mysql.sql_select_many( | |||
"""select project_module_id, module_name, module_content from idc_project_module where project_id=%d""" % dl[ | |||
0]) | |||
data_list = [] | |||
for mil in module_id_list: | |||
data_dict = {} | |||
data_dict["project_module_id"] = mil.get("project_module_id") | |||
data_dict["gnmc"] = mil.get("module_name") | |||
# data_dict["glm_desc"] = baidu.CallResult(mil.get("module_content")) | |||
data_dict["gnms"] = mil.get("module_content") | |||
# print(f'module_content = ({mil.get("module_content")}), glm_desc = ({data_dict["glm_desc"]})') | |||
data_list.append(data_dict) | |||
# print(data_list) | |||
for i in data_list: | |||
# where xmmc = '南浔区信息化项目全生命周期管理系统' where xmmc = '丽水数字教育(一期)项目' | |||
gnmk_copy1 = mysql.sql_select_many("""select * from user_history_module_data WHERE gnmc not in ('专项考评管理应用')""") | |||
if gnmk_copy1: | |||
desc_info_list = [] | |||
for gc in gnmk_copy1: | |||
if gc.get("xmmc") != dl[2]: | |||
desc_info_list.append(gc.get("gnms")) | |||
similarity, s1, s2, idx = nlp.main(i.get("gnms"), desc_info_list) | |||
if idx == -1: | |||
continue | |||
mysql.sql_change_msg( | |||
"""insert into idc_project_module_check (project_module_id, module_name, project_name, company_name, create_time, update_time) value(%d, "%s", "%s", "%s", "%s", "%s")""" | |||
% ( | |||
i.get("project_module_id"), escape_string(gnmk_copy1[idx].get("gnmc")), escape_string(gnmk_copy1[idx].get("xmmc")), "", | |||
str(datetime.datetime.now())[:-7], | |||
str(datetime.datetime.now())[:-7])) | |||
dup_module_id = mysql.cur.lastrowid | |||
check_module_info(mysql, gnmk_copy1[idx], dl, i, dup_module_id, similarity) | |||
def check_module_info(mysql, gc, dl, pro, dup_module_id, score): | |||
total_similarity1 = 0 | |||
total_similarity2 = 0 | |||
for j in ["gnmc", "gnms"]: | |||
# 循环遍历每一个模块名称 | |||
content_x = gc.get(j) | |||
content_y = pro.get(j) | |||
if content_x and content_y: | |||
if j == "gnmc": | |||
# print("功能名称对比") | |||
similarity, check_desc = glm_utils.AutoDLResult(f"""请帮我分析以下两段重复语句重复的地方: \n第一段话是:'{content_y}', \n ----------------- \n 第二段话是:'{content_x}'""") | |||
# # 相似度相加 | |||
if similarity is None: | |||
similarity = 0 | |||
print(f"similarity is {similarity}") | |||
total_similarity1 += similarity/100 | |||
mysql.sql_change_msg( | |||
"""insert into idc_project_module_check_detail (dup_module_id, project_name, module_content, dup_module_content, similarity, dimension, create_time, update_time, check_desc) value (%d, "%s", "%s", "%s", %f, "%s", "%s", "%s", "%s")""" | |||
% (dup_module_id, dl[2], escape_string(content_y), escape_string(content_x), similarity, | |||
"功能名称", | |||
str(datetime.datetime.now())[:-7], str(datetime.datetime.now())[:-7], escape_string(check_desc))) | |||
else: | |||
check_desc = glm_utils.AutoDLResultNoNum(f"""请帮我分析以下两段重复语句重复的地方: \n第一段话是:'{content_y}', \n ----------------- \n 第二段话是:'{content_x}'""") | |||
similarity = score | |||
# 相似度相加 gnms | |||
total_similarity2 += similarity | |||
module_content = pro.get("gnms") | |||
dup_module_content = gc.get("gnms") | |||
mysql.sql_change_msg( | |||
"""insert into idc_project_module_check_detail (dup_module_id, project_name, module_content, dup_module_content, similarity, dimension, create_time, update_time, check_desc) value (%d, "%s", "%s", "%s", %f, "%s", "%s", "%s", "%s")""" | |||
% (dup_module_id, dl[2], escape_string(module_content), escape_string(dup_module_content), | |||
similarity, | |||
"功能模块描述", | |||
str(datetime.datetime.now())[:-7], str(datetime.datetime.now())[:-7], | |||
escape_string(check_desc))) | |||
mysql.sql_change_msg("""update idc_project_module_check set similarity=%f where dup_module_id=%d""" % ( | |||
total_similarity1 + total_similarity2, dup_module_id)) | |||
def project_check(data_list): | |||
mysql = mysql_pool.ConnMysql() | |||
# mysql.sql_select_many("""select * from mkgjc""") | |||
# 读取历史数据 | |||
xmnr_count = len(mysql.sql_select_many("""select * from user_history_data""")) | |||
gnmk_count = len(mysql.sql_select_many("""select * from user_history_module_data""")) | |||
nlp = model_scope.Bert_nlp("corom") | |||
# 遍历excel存储路径 | |||
for dl in data_list: | |||
# path = "0825-丽水系统查重维度1.xlsx" | |||
# 读取路径下的excel | |||
print(dl,dl[1]) | |||
df = pd.read_excel(dl[1]) | |||
data = df.values | |||
# 将excel文件中的所有维度内容进行拼接 | |||
join_str = "" | |||
str_dict = {} | |||
gnmk_str = [] | |||
title = "" | |||
er_title = set() | |||
for d in data: | |||
# if pd.notnull(d[0]): | |||
# title = d[0] | |||
# if title == "功能模块": | |||
# er_title.add(d[1]) | |||
# join_str = "" | |||
# for i in d[1:]: | |||
# if pd.notnull(i): | |||
# join_str += str(i) | |||
# str_dict[wdys1.get(title)] = join_str | |||
if pd.notnull(d[0]): | |||
title = d[0] | |||
if title == "功能模块": | |||
er_title.add(d[1]) | |||
join_str = "" | |||
for i in d[1:]: | |||
if pd.notnull(i): | |||
join_str += str(i) | |||
if title == "功能模块": | |||
if i == '功能描述': | |||
continue | |||
else: | |||
gnmk_str.append(i) | |||
str_dict[wdys1.get(title)] = join_str | |||
else: | |||
if title == "功能模块": | |||
er_title.add(d[1]) | |||
for i in d[1:]: | |||
if pd.notnull(i): | |||
join_str += str(i) | |||
str_dict[wdys1.get(title)] = str_dict.get(wdys1.get(title)) + join_str | |||
# print(str_dict) | |||
gnmk = ",".join(gnmk_str) | |||
str_dict['gnmk'] = gnmk | |||
mysql.sql_change_msg( | |||
"""insert into user_data (xmmc, xzwt, xtjc, xmmb, yqjx, jsxq, sjxq, aqxq, ywly, hxyw, ywxq, ywxt, jscj, yhfw, mbqt, jsnr, gnmk, sjgx, znys) value ("%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s")""" | |||
% (dl[0], str_dict.get("xzwt") if str_dict.get("xzwt") else None, | |||
str_dict.get("xtjc") if str_dict.get("xtjc") else None, | |||
str_dict.get("xmmb") if str_dict.get("xmmb") else None, | |||
str_dict.get("yqjx") if str_dict.get("yqjx") else None, | |||
str_dict.get("jsxq") if str_dict.get("jsxq") else None, | |||
str_dict.get("sjxq") if str_dict.get("sjxq") else None, | |||
str_dict.get("aqxq") if str_dict.get("aqxq") else None, | |||
str_dict.get("ywly") if str_dict.get("ywly") else None, | |||
str_dict.get("hxyw") if str_dict.get("hxyw") else None, | |||
str_dict.get("ywxq") if str_dict.get("ywxq") else None, | |||
str_dict.get("ywxt") if str_dict.get("ywxt") else None, | |||
str_dict.get("jscj") if str_dict.get("jscj") else None, | |||
str_dict.get("yhfw") if str_dict.get("yhfw") else None, | |||
str_dict.get("mbqt") if str_dict.get("mbqt") else None, | |||
str_dict.get("jsnr") if str_dict.get("jsnr") else None, | |||
str_dict.get("gnmk") if str_dict.get("gnmk") else None, | |||
str_dict.get("sjgx") if str_dict.get("sjgx") else None, | |||
str_dict.get("znys") if str_dict.get("znys") else None)) | |||
# 或取所有的xmnr_copy1 where xmmc = '南浔区信息化项目全生命周期管理系统' where xmmc = '丽水数字教育(一期)项目' | |||
xmnr_copy1 = mysql.sql_select_many("""select * from user_history_data """) | |||
# 对比xmnr_copy1和xmnr维度是否都有 | |||
if xmnr_copy1: | |||
# threads = [Thread(target=check_project_info, args=(mysql, dl, xc, str_dict)) for xc in xmnr_copy1] | |||
# for t in threads: | |||
# t.start() | |||
# | |||
# for t in threads: | |||
# t.join() | |||
# pro_ths = [] | |||
# for xc in xmnr_copy1: | |||
# # check_project_info(mysql, dl, xc, str_dict) | |||
# p = Thread(target=check_project_info, args=(mysql, dl, xc, str_dict)) | |||
# pro_ths.append(p) | |||
# p.start() | |||
# for p in pro_ths: | |||
# p.join() | |||
xmnr_copy1_new = [] | |||
for xc in xmnr_copy1: | |||
if xc["xmmc"] == str_dict.get("xmmc"): | |||
continue | |||
check_project_info(mysql, dl, xc, str_dict, nlp) | |||
# 找出相识对最高的项目通过glm分析 | |||
mysql.sql_change_msg( | |||
"""update idc_project set dup_status=3, one_vote_veto_status=1, self_check_status=1, history_project_count=%d ,module_count=%d where project_id=%d""" % ( | |||
xmnr_count, gnmk_count, dl[0])) | |||
gong_neng_mo_kuai(mysql, dl, data, er_title, str_dict) | |||
def check_project_info(mysql, dl, xc, str_dict, nlp): | |||
total_keywords = {} | |||
total_similarity = 0 | |||
dup_count = 0 | |||
# 保存相加后的相似度到idc_project_check | |||
mysql.sql_change_msg( | |||
"""insert into idc_project_check (project_id, dup_project_name, file_path, company_name, create_year, project_tag, project_range_tag, project_area, create_time, update_time) value ("%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s")""" | |||
% (dl[0], escape_string(xc.get("xmmc")), escape_string(dl[1]), "", "", "需求相似、业务相似", "历史项目", "", | |||
str(datetime.datetime.now())[:-7], str(datetime.datetime.now())[:-7])) | |||
dup_id = mysql.cur.lastrowid | |||
for x in list(xc.keys())[1:]: | |||
content_x = xc.get(x) | |||
content_y = str_dict.get(x) | |||
if content_x and content_y: | |||
if x == 'gnmk': | |||
continue | |||
elif x == 'jsnr': | |||
continue | |||
else: | |||
dup_count += 1 | |||
if ((xc['gnmk'] == 'None' or xc['gnmk'] is None or str.strip(xc['gnmk']) == '') and (str_dict['gnmk'] is None or str.strip(str_dict['gnmk']) == '')) and ( | |||
not xc['jsnr'] is None and xc['jsnr'] != 'None' and not str_dict['jsnr'] is None and len(str.strip(str_dict['jsnr'])) > 0): | |||
for x in list(xc.keys())[1:]: | |||
content_x = xc.get(x) | |||
content_y = str_dict.get(x) | |||
if content_x and content_y: | |||
if x == 'gnmk': | |||
# 循环遍历每一个维度 | |||
contents_y = [] | |||
contents_y.append(content_y) | |||
similarity, content1, content2, idx = nlp.main(content_x, contents_y) | |||
similarity = similarity * 0 | |||
# print("**************相似度: %.2f%%" % similarity, "关键词: %s" % keywords_y) | |||
# 相似度相加 | |||
total_similarity += similarity | |||
function_content = content_y | |||
dup_function_content = content_x | |||
# 保存每个维度对应的相似度到idc_project_check_detail | |||
mysql.sql_change_msg( | |||
"""insert into idc_project_check_detail (dup_id, dimension, similarity, function_content, dup_function_content, create_time, update_time) value (%d, "%s", %f, "%s", "%s", "%s", "%s")""" | |||
% (dup_id, wdys2.get(x), similarity, escape_string(function_content), | |||
escape_string(dup_function_content), str(datetime.datetime.now())[:-7], | |||
str(datetime.datetime.now())[:-7])) | |||
elif x == 'jsnr': | |||
# 循环遍历每一个维度 | |||
contents_y = [] | |||
contents_y.append(content_y) | |||
similarity, content1, content2, idx = nlp.main(content_x, contents_y) | |||
similarity = similarity * 40 | |||
# print("**************相似度: %.2f%%" % similarity, "关键词: %s" % keywords_y) | |||
# 相似度相加 | |||
total_similarity += similarity | |||
function_content = content_y | |||
dup_function_content = content_x | |||
# 保存每个维度对应的相似度到idc_project_check_detail | |||
mysql.sql_change_msg( | |||
"""insert into idc_project_check_detail (dup_id, dimension, similarity, function_content, dup_function_content, create_time, update_time) value (%d, "%s", %f, "%s", "%s", "%s", "%s")""" | |||
% (dup_id, wdys2.get(x), similarity, escape_string(function_content), | |||
escape_string(dup_function_content), str(datetime.datetime.now())[:-7], | |||
str(datetime.datetime.now())[:-7])) | |||
else: | |||
# 循环遍历每一个维度 | |||
contents_y = [] | |||
contents_y.append(content_y) | |||
similarity, content1, content2, idx = nlp.main(content_x, contents_y) | |||
similarity = similarity * (60 / dup_count) | |||
# print("**************相似度: %.2f%%" % similarity, "关键词: %s" % keywords_y) | |||
# 相似度相加 | |||
total_similarity += similarity | |||
function_content = content_y | |||
dup_function_content = content_x | |||
# 保存每个维度对应的相似度到idc_project_check_detail | |||
mysql.sql_change_msg( | |||
"""insert into idc_project_check_detail (dup_id, dimension, similarity, function_content, dup_function_content, create_time, update_time) value (%d, "%s", %f, "%s", "%s", "%s", "%s")""" | |||
% (dup_id, wdys2.get(x), similarity, escape_string(function_content), | |||
escape_string(dup_function_content), str(datetime.datetime.now())[:-7], | |||
str(datetime.datetime.now())[:-7])) | |||
elif ((xc['jsnr'] == 'None' or xc['jsnr'] is None or str.strip(xc['jsnr']) == '') and (str_dict['jsnr'] is None or str.strip(str_dict['jsnr']) == '')) and ( | |||
not xc['gnmk'] is None and xc['gnmk'] != 'None' and not str_dict['gnmk'] is None and len(str.strip(str_dict['gnmk'])) > 0): | |||
for x in list(xc.keys())[1:]: | |||
content_x = xc.get(x) | |||
content_y = str_dict.get(x) | |||
if content_x and content_y: | |||
if x == 'gnmk': | |||
# 循环遍历每一个维度 | |||
contents_y = [] | |||
contents_y.append(content_y) | |||
similarity, content1, content2, idx = nlp.main(content_x, contents_y) | |||
similarity = similarity * 50 | |||
# print("**************相似度: %.2f%%" % similarity, "关键词: %s" % keywords_y) | |||
# 相似度相加 | |||
total_similarity += similarity | |||
function_content = content_y | |||
dup_function_content = content_x | |||
# 保存每个维度对应的相似度到idc_project_check_detail | |||
mysql.sql_change_msg( | |||
"""insert into idc_project_check_detail (dup_id, dimension, similarity, function_content, dup_function_content, create_time, update_time) value (%d, "%s", %f, "%s", "%s", "%s", "%s")""" | |||
% (dup_id, wdys2.get(x), similarity, escape_string(function_content), | |||
escape_string(dup_function_content), str(datetime.datetime.now())[:-7], | |||
str(datetime.datetime.now())[:-7])) | |||
elif x == 'jsnr': | |||
# 循环遍历每一个维度 | |||
contents_y = [] | |||
contents_y.append(content_y) | |||
similarity, content1, content2, idx = nlp.main(content_x, contents_y) | |||
similarity = similarity * 0 | |||
# print("**************相似度: %.2f%%" % similarity, "关键词: %s" % keywords_y) | |||
# 相似度相加 | |||
total_similarity += similarity | |||
function_content = content_y | |||
dup_function_content = content_x | |||
# 保存每个维度对应的相似度到idc_project_check_detail | |||
mysql.sql_change_msg( | |||
"""insert into idc_project_check_detail (dup_id, dimension, similarity, function_content, dup_function_content, create_time, update_time) value (%d, "%s", %f, "%s", "%s", "%s", "%s")""" | |||
% (dup_id, wdys2.get(x), similarity, escape_string(function_content), | |||
escape_string(dup_function_content), str(datetime.datetime.now())[:-7], | |||
str(datetime.datetime.now())[:-7])) | |||
else: | |||
# 循环遍历每一个维度 | |||
contents_y = [] | |||
contents_y.append(content_y) | |||
similarity, content1, content2, idx = nlp.main(content_x, contents_y) | |||
similarity = similarity * (50 / dup_count) | |||
# print("**************相似度: %.2f%%" % similarity, "关键词: %s" % keywords_y) | |||
# 相似度相加 | |||
total_similarity += similarity | |||
function_content = content_y | |||
dup_function_content = content_x | |||
# 保存每个维度对应的相似度到idc_project_check_detail | |||
mysql.sql_change_msg( | |||
"""insert into idc_project_check_detail (dup_id, dimension, similarity, function_content, dup_function_content, create_time, update_time) value (%d, "%s", %f, "%s", "%s", "%s", "%s")""" | |||
% (dup_id, wdys2.get(x), similarity, escape_string(function_content), | |||
escape_string(dup_function_content), str(datetime.datetime.now())[:-7], | |||
str(datetime.datetime.now())[:-7])) | |||
else: | |||
for x in list(xc.keys())[1:]: | |||
content_x = xc.get(x) | |||
content_y = str_dict.get(x) | |||
if content_x and content_y: | |||
if x == 'gnmk': | |||
# 循环遍历每一个维度 | |||
contents_y = [] | |||
contents_y.append(content_y) | |||
similarity, content1, content2, idx = nlp.main(content_x, contents_y) | |||
similarity = similarity * 50 | |||
# 相似度相加 | |||
total_similarity += similarity | |||
function_content = content_y | |||
dup_function_content = content_x | |||
# 保存每个维度对应的相似度到idc_project_check_detail | |||
mysql.sql_change_msg( | |||
"""insert into idc_project_check_detail (dup_id, dimension, similarity, function_content, dup_function_content, create_time, update_time) value (%d, "%s", %f, "%s", "%s", "%s", "%s")""" | |||
% (dup_id, wdys2.get(x), similarity, escape_string(function_content), | |||
escape_string(dup_function_content), str(datetime.datetime.now())[:-7], | |||
str(datetime.datetime.now())[:-7])) | |||
elif x == 'jsnr': | |||
# 循环遍历每一个维度 | |||
contents_y = [] | |||
contents_y.append(content_y) | |||
similarity, content1, content2, idx = nlp.main(content_x, contents_y) | |||
similarity = similarity * 40 | |||
# 相似度相加 | |||
total_similarity += similarity | |||
function_content = content_y | |||
dup_function_content = content_x | |||
# 保存每个维度对应的相似度到idc_project_check_detail | |||
mysql.sql_change_msg( | |||
"""insert into idc_project_check_detail (dup_id, dimension, similarity, function_content, dup_function_content, create_time, update_time) value (%d, "%s", %f, "%s", "%s", "%s", "%s")""" | |||
% (dup_id, wdys2.get(x), similarity, escape_string(function_content), | |||
escape_string(dup_function_content), str(datetime.datetime.now())[:-7], | |||
str(datetime.datetime.now())[:-7])) | |||
else: | |||
# 循环遍历每一个维度 | |||
contents_y = [] | |||
contents_y.append(content_y) | |||
print(f"123:{content_x}") | |||
print(f"234:{contents_y}") | |||
similarity, content1, content2, idx = nlp.main(content_x, contents_y) | |||
similarity = similarity * (10 / dup_count) | |||
# print("**************相似度: %.2f%%" % similarity, "关键词: %s" % keywords_y) | |||
# 相似度相加 | |||
total_similarity += similarity | |||
function_content = content_y | |||
dup_function_content = content_x | |||
# 保存每个维度对应的相似度到idc_project_check_detail | |||
mysql.sql_change_msg( | |||
"""insert into idc_project_check_detail (dup_id, dimension, similarity, function_content, dup_function_content, create_time, update_time) value (%d, "%s", %f, "%s", "%s", "%s", "%s")""" | |||
% (dup_id, wdys2.get(x), similarity, escape_string(function_content), | |||
escape_string(dup_function_content), str(datetime.datetime.now())[:-7], | |||
str(datetime.datetime.now())[:-7])) | |||
print("insert 成功") | |||
mysql.sql_change_msg( | |||
"""update idc_project_check set similarity=%f where dup_id=%d""" % (total_similarity, dup_id)) | |||
if __name__ == "__main__": | |||
all_path = requests.get("http://127.0.0.1:19099/check/duplicates/%s" % 599).json() | |||
# print(all_path) | |||
# dict1 = {k:v for k, v in sorted(dict.items(), key= lambda item : item[1])} | |||
# print(dict1) | |||
data_list = [] | |||
for ap in all_path.get("data"): | |||
# if os.path.exists(ap.get("file_path")): | |||
data_list.append((ap.get("project_id"), ap.get("file_path"), ap.get("project_name"))) | |||
print(data_list) | |||
# data_list = [(11, r"C:\Users\HUAWEI\PycharmProjects\nlp\dup_check\0825-丽水系统查重维度1.xlsx", "水路运输综合监管系统建设项目.xls")] | |||
data_list = [(11, r"D:\ningda\dup_check2\dup_check\0825-丽水系统查重维度1.xlsx", "水路运输综合监管系统建设项目.xls")] | |||
project_check(data_list) |
@@ -0,0 +1,437 @@ | |||
# coding=utf-8 | |||
import sys | |||
import re | |||
import mysql_pool | |||
from pymysql.converters import escape_string | |||
import model_scope | |||
import pandas as pd | |||
import datetime | |||
import requests | |||
# 通过corom算法进行文本向量化对比相识度 | |||
wdys1 = { | |||
"项目名称": "xmmc", | |||
"现状问题": "xzwt", | |||
"系统基础": "xtjc", | |||
"项目目标": "xmmb", | |||
"预期绩效": "yqjx", | |||
"建设需求": "jsxq", | |||
"数据需求": "sjxq", | |||
"安全需求": "aqxq", | |||
"业务领域": "ywly", | |||
"核心业务": "hxyw", | |||
"业务需求": "ywxq", | |||
"业务协同": "ywxt", | |||
"建设层级": "jscj", | |||
"用户范围": "yhfw", | |||
"目标群体": "mbqt", | |||
"建设内容": "jsnr", | |||
"功能模块": "gnmk", | |||
"数据共享": "sjgx", | |||
"智能要素": "znys" | |||
} | |||
wdys2 = { | |||
"xmmc": "项目名称", | |||
"xzwt": "现状问题", | |||
"xtjc": "系统基础", | |||
"xmmb": "项目目标", | |||
"yqjx": "预期绩效", | |||
"jsxq": "建设需求", | |||
"sjxq": "数据需求", | |||
"aqxq": "安全需求", | |||
"ywly": "业务领域", | |||
"hxyw": "核心业务", | |||
"ywxq": "业务需求", | |||
"ywxt": "业务协同", | |||
"jscj": "建设层级", | |||
"yhfw": "用户范围", | |||
"mbqt": "目标群体", | |||
"jsnr": "建设内容", | |||
"gnmk": "功能模块", | |||
"sjgx": "数据共享", | |||
"znys": "智能要素" | |||
} | |||
gnmkys = { | |||
"gnmc": "功能名称", | |||
"gnms": "功能描述" | |||
} | |||
def getFlag(): | |||
data_dict = {} | |||
df = pd.read_excel("0825.xlsx") | |||
data = df.values | |||
data = list(pd.Series(data[:, 1]).dropna()) | |||
for d in data: | |||
try: | |||
wd = re.search("(.*?)(.*?%)", d).group(1).strip() | |||
wdc = wdys1.get(wd) | |||
if wdc: | |||
qz = re.search(".*?((.*?%))", d).group(1) | |||
data_dict[wdc] = qz | |||
except: | |||
pass | |||
return data_dict | |||
def gong_neng_mo_kuai(mysql, dl, data, er_title, similarity_nlp): | |||
# 将excel文件中的所有第三维度内容进行拼接 | |||
str_dict = {} | |||
for et in er_title: | |||
for d in data: | |||
if d[1] == et: | |||
if str_dict.get(et): | |||
str_dict[et] = str_dict.get(et) + d[3] | |||
else: | |||
str_dict[et] = d[3] | |||
for k, v in str_dict.items(): | |||
mysql.sql_change_msg( | |||
"""insert into idc_project_module (project_id, check_duplicate_count, module_name, module_content, create_time, update_time, tag) value(%d, 1, "%s", "%s", "%s", "%s", "模块")""" % ( | |||
int(dl[0]), k, v, str(datetime.datetime.now())[:-7], str(datetime.datetime.now())[:-7])) | |||
module_id_list = mysql.sql_select_many( | |||
"""select project_module_id, module_name, module_content from idc_project_module where project_id=%d""" % dl[ | |||
0]) | |||
data_list = [] | |||
for mil in module_id_list: | |||
data_dict = {} | |||
data_dict["project_module_id"] = mil.get("project_module_id") | |||
data_dict["gnmc"] = mil.get("module_name") | |||
data_dict["gnms"] = mil.get("module_content") | |||
data_list.append(data_dict) | |||
# print(data_list) | |||
for i in data_list: | |||
gnmk_copy1 = mysql.sql_select_many("""select * from user_history_module_data WHERE xmmc = '丽水市城市管理指挥中心信息系统(一期)项目' """) | |||
if gnmk_copy1: | |||
for gc in gnmk_copy1: | |||
print( | |||
"""insert into idc_project_module_check (project_module_id, module_name, project_name, company_name, create_time, update_time) value(%d, "%s", "%s", "%s", "%s", "%s")""" | |||
% ( | |||
i.get("project_module_id"), escape_string(gc.get("gnmc")), escape_string(gc.get("xmmc")), "", | |||
str(datetime.datetime.now())[:-7], | |||
str(datetime.datetime.now())[:-7])) | |||
mysql.sql_change_msg( | |||
"""insert into idc_project_module_check (project_module_id, module_name, project_name, company_name, create_time, update_time) value(%d, "%s", "%s", "%s", "%s", "%s")""" | |||
% ( | |||
i.get("project_module_id"), escape_string(gc.get("gnmc")), escape_string(gc.get("xmmc")), "", | |||
str(datetime.datetime.now())[:-7], | |||
str(datetime.datetime.now())[:-7])) | |||
dup_module_id = mysql.cur.lastrowid | |||
check_module_info(mysql, gc, dl, i, dup_module_id, similarity_nlp) | |||
def check_module_info(mysql, gc, dl, pro, dup_module_id, similarity_nlp): | |||
total_similarity1 = 0 | |||
total_keywords1 = [] | |||
total_similarity2 = 0 | |||
total_keywords2 = [] | |||
for j in ["gnmc", "gnms"]: | |||
# 循环遍历每一个模块名称 | |||
content_x = gc.get(j) | |||
content_y = pro.get(j) | |||
if content_x and content_y: | |||
if j == "gnmc": | |||
print("功能名称 暂时不计算") | |||
# 相似度 | |||
# similarity = similarity_nlp.main(content_x, content_y) | |||
# similarity = similarity * 1 | |||
# # 相似度相加 | |||
# total_similarity1 += similarity | |||
# mysql.sql_change_msg( | |||
# """insert into idc_project_module_check_detail (dup_module_id, project_name, module_content, dup_module_content, similarity, dimension, create_time, update_time) value (%d, "%s", "%s", "%s", %f, "%s", "%s", "%s")""" | |||
# % (dup_module_id, dl[2], escape_string(content_y), escape_string(content_x), similarity, | |||
# "功能名称", | |||
# str(datetime.datetime.now())[:-7], str(datetime.datetime.now())[:-7])) | |||
else: | |||
# 相似度 | |||
similarity = similarity_nlp.main(content_x, content_y) | |||
similarity = similarity * 99 | |||
# 相似度相加 | |||
total_similarity2 += similarity | |||
mysql.sql_change_msg( | |||
"""insert into idc_project_module_check_detail (dup_module_id, project_name, module_content, dup_module_content, similarity, dimension, create_time, update_time) value (%d, "%s", "%s", "%s", %f, "%s", "%s", "%s")""" | |||
% (dup_module_id, dl[2], escape_string(content_y), escape_string(content_x), similarity, | |||
"功能模块描述", | |||
str(datetime.datetime.now())[:-7], str(datetime.datetime.now())[:-7])) | |||
mysql.sql_change_msg("""update idc_project_module_check set similarity=%f where dup_module_id=%d""" % ( | |||
total_similarity1 + total_similarity2, dup_module_id)) | |||
def project_check(data_list): | |||
similarity_nlp = model_scope.Bert_nlp("structbert") | |||
mysql = mysql_pool.ConnMysql() | |||
# 读取维度和权重 | |||
xmnr_count = len(mysql.sql_select_many("""select * from user_history_data""")) | |||
gnmk_count = len(mysql.sql_select_many("""select * from user_history_module_data""")) | |||
# 遍历excel存储路径 | |||
for dl in data_list: | |||
# 读取路径下的excel | |||
print(dl,dl[1]) | |||
df = pd.read_excel(dl[1]) | |||
data = df.values | |||
# 将excel文件中的所有维度内容进行拼接 | |||
join_str = "" | |||
str_dict = {} | |||
gnmk_str = [] | |||
title = "" | |||
er_title = set() | |||
for d in data: | |||
if pd.notnull(d[0]): | |||
title = d[0] | |||
if title == "功能模块": | |||
er_title.add(d[1]) | |||
join_str = "" | |||
for i in d[1:]: | |||
if pd.notnull(i): | |||
join_str += str(i) | |||
if title == "功能模块": | |||
if i == '功能描述': | |||
continue | |||
else: | |||
gnmk_str.append(i) | |||
str_dict[wdys1.get(title)] = join_str | |||
else: | |||
if title == "功能模块": | |||
er_title.add(d[1]) | |||
for i in d[1:]: | |||
if pd.notnull(i): | |||
join_str += str(i) | |||
str_dict[wdys1.get(title)] = str_dict.get(wdys1.get(title)) + join_str | |||
# print(str_dict) | |||
gnmk = ",".join(gnmk_str) | |||
str_dict['gnmk'] = gnmk | |||
mysql.sql_change_msg( | |||
"""insert into user_data (xmmc, xzwt, xtjc, xmmb, yqjx, jsxq, sjxq, aqxq, ywly, hxyw, ywxq, ywxt, jscj, yhfw, mbqt, jsnr, gnmk, sjgx, znys) value ("%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s")""" | |||
% (dl[0], str_dict.get("xzwt") if str_dict.get("xzwt") else None, | |||
str_dict.get("xtjc") if str_dict.get("xtjc") else None, | |||
str_dict.get("xmmb") if str_dict.get("xmmb") else None, | |||
str_dict.get("yqjx") if str_dict.get("yqjx") else None, | |||
str_dict.get("jsxq") if str_dict.get("jsxq") else None, | |||
str_dict.get("sjxq") if str_dict.get("sjxq") else None, | |||
str_dict.get("aqxq") if str_dict.get("aqxq") else None, | |||
str_dict.get("ywly") if str_dict.get("ywly") else None, | |||
str_dict.get("hxyw") if str_dict.get("hxyw") else None, | |||
str_dict.get("ywxq") if str_dict.get("ywxq") else None, | |||
str_dict.get("ywxt") if str_dict.get("ywxt") else None, | |||
str_dict.get("jscj") if str_dict.get("jscj") else None, | |||
str_dict.get("yhfw") if str_dict.get("yhfw") else None, | |||
str_dict.get("mbqt") if str_dict.get("mbqt") else None, | |||
str_dict.get("jsnr") if str_dict.get("jsnr") else None, | |||
str_dict.get("gnmk") if str_dict.get("gnmk") else None, | |||
str_dict.get("sjgx") if str_dict.get("sjgx") else None, | |||
str_dict.get("znys") if str_dict.get("znys") else None)) | |||
# 或取所有的xmnr_copy1 | |||
xmnr_copy1 = mysql.sql_select_many("""select * from user_history_data WHERE xmmc = '丽水市城市管理指挥中心信息系统(一期)项目' """) | |||
# 对比xmnr_copy1和xmnr维度是否都有 | |||
if xmnr_copy1: | |||
# threads = [Thread(target=check_project_info, args=(mysql, dl, xc, str_dict)) for xc in xmnr_copy1] | |||
# for t in threads: | |||
# t.start() | |||
# | |||
# for t in threads: | |||
# t.join() | |||
for xc in xmnr_copy1: | |||
check_project_info(mysql, dl, xc, str_dict, similarity_nlp) | |||
mysql.sql_change_msg( | |||
"""update idc_project set dup_status=3, one_vote_veto_status=1, self_check_status=1, history_project_count=%d ,module_count=%d where project_id=%d""" % ( | |||
xmnr_count, gnmk_count, dl[0])) | |||
gong_neng_mo_kuai(mysql, dl, data, er_title, similarity_nlp) | |||
# 释放数据库资源 | |||
mysql.release() | |||
def check_project_info(mysql, dl, xc, str_dict, similarity_nlp): | |||
total_keywords = {} | |||
total_similarity = 0 | |||
dup_count = 0 | |||
# 保存相加后的相似度到idc_project_check | |||
print(f'xmmc is {xc.get("xmmc")}') | |||
mysql.sql_change_msg( | |||
"""insert into idc_project_check (project_id, dup_project_name, file_path, company_name, create_year, project_tag, project_range_tag, project_area, create_time, update_time) value ("%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s")""" | |||
% (dl[0], escape_string(xc.get("xmmc")), escape_string(dl[1]), "", "", "需求相似、业务相似", "历史项目", "", | |||
str(datetime.datetime.now())[:-7], str(datetime.datetime.now())[:-7])) | |||
dup_id = mysql.cur.lastrowid | |||
for x in list(xc.keys())[1:]: | |||
content_x = xc.get(x) | |||
content_y = str_dict.get(x) | |||
if content_x and content_y: | |||
if x == 'gnmk': | |||
continue | |||
elif x == 'jsnr': | |||
continue | |||
else: | |||
dup_count += 1 | |||
if ((xc['gnmk'] == 'None' or xc['gnmk'] is None or str.strip(xc['gnmk']) == '') and (str_dict['gnmk'] is None or str.strip(str_dict['gnmk']) == '')) and ( | |||
not xc['jsnr'] is None and xc['jsnr'] != 'None' and not str_dict['jsnr'] is None and len(str.strip(str_dict['jsnr'])) > 0): | |||
for x in list(xc.keys())[1:]: | |||
content_x = xc.get(x) | |||
content_y = str_dict.get(x) | |||
if content_x and content_y: | |||
if x == 'gnmk': | |||
# 循环遍历每一个维度 | |||
# 相似度 | |||
similarity= similarity_nlp.main(content_x, content_y) | |||
similarity = similarity * 0 | |||
# print("**************相似度: %.2f%%" % similarity, "关键词: %s" % keywords_y) | |||
# 相似度相加 | |||
total_similarity += similarity | |||
function_content = content_y | |||
dup_function_content = content_x | |||
# 保存每个维度对应的相似度到idc_project_check_detail | |||
mysql.sql_change_msg( | |||
"""insert into idc_project_check_detail (dup_id, dimension, similarity, function_content, dup_function_content, create_time, update_time) value (%d, "%s", %f, "%s", "%s", "%s", "%s")""" | |||
% (dup_id, wdys2.get(x), similarity, escape_string(function_content), | |||
escape_string(dup_function_content), str(datetime.datetime.now())[:-7], | |||
str(datetime.datetime.now())[:-7])) | |||
# content = content.replace(gjcs, f'<span class="similarity">{gjcs.strip()}</span>') | |||
elif x == 'jsnr': | |||
# 相似度 关键词 | |||
similarity = similarity_nlp.main(content_x, content_y) | |||
similarity = similarity * 40 | |||
# print("**************相似度: %.2f%%" % similarity, "关键词: %s" % keywords_y) | |||
# 相似度相加 | |||
total_similarity += similarity | |||
# 关键词收集 | |||
function_content = content_y | |||
dup_function_content = content_x | |||
# 保存每个维度对应的相似度到idc_project_check_detail | |||
mysql.sql_change_msg( | |||
"""insert into idc_project_check_detail (dup_id, dimension, similarity, function_content, dup_function_content, create_time, update_time) value (%d, "%s", %f, "%s", "%s", "%s", "%s")""" | |||
% (dup_id, wdys2.get(x), similarity, escape_string(function_content), | |||
escape_string(dup_function_content), str(datetime.datetime.now())[:-7], | |||
str(datetime.datetime.now())[:-7])) | |||
else: | |||
# 相似度 关键词 | |||
similarity = similarity_nlp.main(content_x, content_y) | |||
similarity = similarity * (60 / dup_count) | |||
# 相似度相加 | |||
total_similarity += similarity | |||
function_content = content_y | |||
dup_function_content = content_x | |||
# 保存每个维度对应的相似度到idc_project_check_detail | |||
mysql.sql_change_msg( | |||
"""insert into idc_project_check_detail (dup_id, dimension, similarity, function_content, dup_function_content, create_time, update_time) value (%d, "%s", %f, "%s", "%s", "%s", "%s")""" | |||
% (dup_id, wdys2.get(x), similarity, escape_string(function_content), | |||
escape_string(dup_function_content), str(datetime.datetime.now())[:-7], | |||
str(datetime.datetime.now())[:-7])) | |||
elif ((xc['jsnr'] == 'None' or xc['jsnr'] is None or str.strip(xc['jsnr']) == '') and (str_dict['jsnr'] is None or str.strip(str_dict['jsnr']) == '')) and ( | |||
not xc['gnmk'] is None and xc['gnmk'] != 'None' and not str_dict['gnmk'] is None and len(str.strip(str_dict['gnmk'])) > 0): | |||
for x in list(xc.keys())[1:]: | |||
content_x = xc.get(x) | |||
content_y = str_dict.get(x) | |||
if content_x and content_y: | |||
if x == 'gnmk': | |||
# 相似度 关键词 | |||
similarity = similarity_nlp.main(content_x, content_y) | |||
similarity = similarity * 50 | |||
# 相似度相加 | |||
total_similarity += similarity | |||
function_content = content_y | |||
dup_function_content = content_x | |||
# 保存每个维度对应的相似度到idc_project_check_detail | |||
mysql.sql_change_msg( | |||
"""insert into idc_project_check_detail (dup_id, dimension, similarity, function_content, dup_function_content, create_time, update_time) value (%d, "%s", %f, "%s", "%s", "%s", "%s")""" | |||
% (dup_id, wdys2.get(x), similarity, escape_string(function_content), | |||
escape_string(dup_function_content), str(datetime.datetime.now())[:-7], | |||
str(datetime.datetime.now())[:-7])) | |||
elif x == 'jsnr': | |||
# 相似度 | |||
similarity = similarity_nlp.main(content_x, content_y) | |||
similarity = similarity * 0 | |||
# 相似度相加 | |||
total_similarity += similarity | |||
function_content = content_y | |||
dup_function_content = content_x | |||
# 保存每个维度对应的相似度到idc_project_check_detail | |||
mysql.sql_change_msg( | |||
"""insert into idc_project_check_detail (dup_id, dimension, similarity, function_content, dup_function_content, create_time, update_time) value (%d, "%s", %f, "%s", "%s", "%s", "%s")""" | |||
% (dup_id, wdys2.get(x), similarity, escape_string(function_content), | |||
escape_string(dup_function_content), str(datetime.datetime.now())[:-7], | |||
str(datetime.datetime.now())[:-7])) | |||
else: | |||
# 相似度 关键词 | |||
similarity = similarity_nlp.main(content_x, content_y) | |||
similarity = similarity * (50 / dup_count) | |||
# 相似度相加 | |||
total_similarity += similarity | |||
function_content = content_y | |||
dup_function_content = content_x | |||
# 保存每个维度对应的相似度到idc_project_check_detail | |||
mysql.sql_change_msg( | |||
"""insert into idc_project_check_detail (dup_id, dimension, similarity, function_content, dup_function_content, create_time, update_time) value (%d, "%s", %f, "%s", "%s", "%s", "%s")""" | |||
% (dup_id, wdys2.get(x), similarity, escape_string(function_content), | |||
escape_string(dup_function_content), str(datetime.datetime.now())[:-7], | |||
str(datetime.datetime.now())[:-7])) | |||
else: | |||
for x in list(xc.keys())[1:]: | |||
content_x = xc.get(x) | |||
content_y = str_dict.get(x) | |||
if content_x and content_y: | |||
if x == 'gnmk': | |||
# 相似度 | |||
similarity = similarity_nlp.main(content_x, content_y) | |||
similarity = similarity * 50 | |||
# 相似度相加 | |||
total_similarity += similarity | |||
function_content = content_y | |||
dup_function_content = content_x | |||
# 保存每个维度对应的相似度到idc_project_check_detail | |||
mysql.sql_change_msg( | |||
"""insert into idc_project_check_detail (dup_id, dimension, similarity, function_content, dup_function_content, create_time, update_time) value (%d, "%s", %f, "%s", "%s", "%s", "%s")""" | |||
% (dup_id, wdys2.get(x), similarity, escape_string(function_content), | |||
escape_string(dup_function_content), str(datetime.datetime.now())[:-7], | |||
str(datetime.datetime.now())[:-7])) | |||
# content = content.replace(gjcs, f'<span class="similarity">{gjcs.strip()}</span>') | |||
elif x == 'jsnr': | |||
# 相似度 | |||
similarity = similarity_nlp.main(content_x, content_y) | |||
similarity = similarity * 40 | |||
# 相似度相加 | |||
total_similarity += similarity | |||
function_content = content_y | |||
dup_function_content = content_x | |||
# 保存每个维度对应的相似度到idc_project_check_detail | |||
mysql.sql_change_msg( | |||
"""insert into idc_project_check_detail (dup_id, dimension, similarity, function_content, dup_function_content, create_time, update_time) value (%d, "%s", %f, "%s", "%s", "%s", "%s")""" | |||
% (dup_id, wdys2.get(x), similarity, escape_string(function_content), | |||
escape_string(dup_function_content), str(datetime.datetime.now())[:-7], | |||
str(datetime.datetime.now())[:-7])) | |||
else: | |||
# 相似度 关键词 | |||
similarity = similarity_nlp.main(content_x, content_y) | |||
similarity = similarity * (10 / dup_count) | |||
# 相似度相加 | |||
total_similarity += similarity | |||
function_content = content_y | |||
dup_function_content = content_x | |||
# 保存每个维度对应的相似度到idc_project_check_detail | |||
mysql.sql_change_msg( | |||
"""insert into idc_project_check_detail (dup_id, dimension, similarity, function_content, dup_function_content, create_time, update_time) value (%d, "%s", %f, "%s", "%s", "%s", "%s")""" | |||
% (dup_id, wdys2.get(x), similarity, escape_string(function_content), | |||
escape_string(dup_function_content), str(datetime.datetime.now())[:-7], | |||
str(datetime.datetime.now())[:-7])) | |||
mysql.sql_change_msg( | |||
"""update idc_project_check set similarity=%f where dup_id=%d""" % (total_similarity, dup_id)) | |||
if __name__ == "__main__": | |||
all_path = requests.get("http://127.0.0.1:19099/check/duplicates/%s" % 599).json() | |||
# print(all_path) | |||
# dict1 = {k:v for k, v in sorted(dict.items(), key= lambda item : item[1])} | |||
# print(dict1) | |||
data_list = [] | |||
for ap in all_path.get("data"): | |||
# if os.path.exists(ap.get("file_path")): | |||
data_list.append((ap.get("project_id"), ap.get("file_path"), ap.get("project_name"))) | |||
print(data_list) | |||
# data_list = [(11, r"C:\Users\HUAWEI\PycharmProjects\nlp\dup_check\0825-丽水系统查重维度1.xlsx", "水路运输综合监管系统建设项目.xls")] | |||
data_list = [(11, r"D:\ningda\dup_check2\dup_check\0825-丽水系统查重维度1.xlsx", "水路运输综合监管系统建设项目.xls")] | |||
project_check(data_list) |
@@ -0,0 +1,720 @@ | |||
# coding=utf-8 | |||
import sys | |||
import re | |||
import baidu | |||
import mysql_pool | |||
from pymysql.converters import escape_string | |||
import cosin_similarity | |||
import pandas as pd | |||
import datetime | |||
import requests | |||
import glm_utils | |||
from threading import Thread | |||
wdys1 = { | |||
"项目名称": "xmmc", | |||
"现状问题": "xzwt", | |||
"系统基础": "xtjc", | |||
"项目目标": "xmmb", | |||
"预期绩效": "yqjx", | |||
"建设需求": "jsxq", | |||
"数据需求": "sjxq", | |||
"安全需求": "aqxq", | |||
"业务领域": "ywly", | |||
"核心业务": "hxyw", | |||
"业务需求": "ywxq", | |||
"业务协同": "ywxt", | |||
"建设层级": "jscj", | |||
"用户范围": "yhfw", | |||
"目标群体": "mbqt", | |||
"建设内容": "jsnr", | |||
"功能模块": "gnmk", | |||
"数据共享": "sjgx", | |||
"智能要素": "znys" | |||
} | |||
wdys2 = { | |||
"xmmc": "项目名称", | |||
"xzwt": "现状问题", | |||
"xtjc": "系统基础", | |||
"xmmb": "项目目标", | |||
"yqjx": "预期绩效", | |||
"jsxq": "建设需求", | |||
"sjxq": "数据需求", | |||
"aqxq": "安全需求", | |||
"ywly": "业务领域", | |||
"hxyw": "核心业务", | |||
"ywxq": "业务需求", | |||
"ywxt": "业务协同", | |||
"jscj": "建设层级", | |||
"yhfw": "用户范围", | |||
"mbqt": "目标群体", | |||
"jsnr": "建设内容", | |||
"gnmk": "功能模块", | |||
"sjgx": "数据共享", | |||
"znys": "智能要素" | |||
} | |||
gnmkys = { | |||
"gnmc": "功能名称", | |||
"gnms": "功能描述" | |||
} | |||
def getFlag(): | |||
data_dict = {} | |||
df = pd.read_excel("0825.xlsx") | |||
data = df.values | |||
data = list(pd.Series(data[:, 1]).dropna()) | |||
for d in data: | |||
try: | |||
wd = re.search("(.*?)(.*?%)", d).group(1).strip() | |||
wdc = wdys1.get(wd) | |||
if wdc: | |||
qz = re.search(".*?((.*?%))", d).group(1) | |||
data_dict[wdc] = qz | |||
except: | |||
pass | |||
return data_dict | |||
def gong_neng_mo_kuai(mysql, dl, data, er_title): | |||
# 将excel文件中的所有第三维度内容进行拼接 | |||
str_dict = {} | |||
for et in er_title: | |||
for d in data: | |||
if d[1] == et: | |||
if str_dict.get(et): | |||
str_dict[et] = str_dict.get(et) + d[3] | |||
else: | |||
str_dict[et] = d[3] | |||
for k, v in str_dict.items(): | |||
mysql.sql_change_msg( | |||
"""insert into idc_project_module (project_id, check_duplicate_count, module_name, module_content, create_time, update_time, tag) value(%d, 1, "%s", "%s", "%s", "%s", "模块")""" % ( | |||
int(dl[0]), k, v, str(datetime.datetime.now())[:-7], str(datetime.datetime.now())[:-7])) | |||
module_id_list = mysql.sql_select_many( | |||
"""select project_module_id, module_name, module_content from idc_project_module where project_id=%d""" % dl[ | |||
0]) | |||
data_list = [] | |||
for mil in module_id_list: | |||
data_dict = {} | |||
data_dict["project_module_id"] = mil.get("project_module_id") | |||
data_dict["gnmc"] = mil.get("module_name") | |||
# data_dict["glm_desc"] = baidu.CallResult(mil.get("module_content")) | |||
data_dict["gnms"] = mil.get("module_content") | |||
# print(f'module_content = ({mil.get("module_content")}), glm_desc = ({data_dict["glm_desc"]})') | |||
data_list.append(data_dict) | |||
# print(data_list) | |||
for i in data_list: | |||
gnmk_copy1 = mysql.sql_select_many("""select * from user_history_module_data where xmmc = '南浔区信息化项目全生命周期管理系统'""") | |||
if gnmk_copy1: | |||
for gc in gnmk_copy1: | |||
print( | |||
"""insert into idc_project_module_check (project_module_id, module_name, project_name, company_name, create_time, update_time) value(%d, "%s", "%s", "%s", "%s", "%s")""" | |||
% ( | |||
i.get("project_module_id"), escape_string(gc.get("gnmc")), escape_string(gc.get("xmmc")), "", | |||
str(datetime.datetime.now())[:-7], | |||
str(datetime.datetime.now())[:-7])) | |||
mysql.sql_change_msg( | |||
"""insert into idc_project_module_check (project_module_id, module_name, project_name, company_name, create_time, update_time) value(%d, "%s", "%s", "%s", "%s", "%s")""" | |||
% ( | |||
i.get("project_module_id"), escape_string(gc.get("gnmc")), escape_string(gc.get("xmmc")), "", | |||
str(datetime.datetime.now())[:-7], | |||
str(datetime.datetime.now())[:-7])) | |||
dup_module_id = mysql.cur.lastrowid | |||
check_module_info(mysql, gc, dl, i, dup_module_id) | |||
gnmk_gjc = {} | |||
for a in ["gnmc", "glm_desc"]: | |||
if i.get(a): | |||
content_x = i.get(a) | |||
content_y = i.get(a) | |||
if a == "gnmc": | |||
similarity = cosin_similarity.CosineSimilarity(content_x, content_y) | |||
# 相似度 关键词 | |||
similarity, keyword_x, keywords = similarity.main() | |||
# 去重 | |||
keywords = list(set(keywords)) | |||
gnmk_gjc[a] = keywords | |||
else: | |||
similarity = cosin_similarity.CosineSimilarity(content_x, content_y) | |||
# 相似度 关键词 | |||
similarity, keyword_x, keywords = similarity.main() | |||
# 去重 | |||
keywords = list(set(keywords)) | |||
gnmk_gjc[a] = keywords | |||
mysql.sql_change_msg("""insert into user_module_keywords (xmmc, gnmc, gnms) value("%s", "%s", "%s")""" % ( | |||
dl[2], str(gnmk_gjc.get("gnmc"))[1:-1] if gnmk_gjc.get("gnmc") else None, | |||
str(gnmk_gjc.get("gnms"))[1:-1] if gnmk_gjc.get("gnms") else None)) | |||
def check_module_info(mysql, gc, dl, pro, dup_module_id): | |||
total_similarity1 = 0 | |||
total_keywords1 = [] | |||
total_similarity2 = 0 | |||
total_keywords2 = [] | |||
for j in ["gnmc", "gnms"]: | |||
# 循环遍历每一个模块名称 | |||
content_x = gc.get(j) | |||
content_y = pro.get(j) | |||
if content_x and content_y: | |||
if j == "gnmc": | |||
# similarity = cosin_similarity.CosineSimilarity(content_x, content_y) | |||
# 相似度 关键词 | |||
# similarity, keyword_x, keywords = similarity.main() | |||
# similarity = similarity * 1 | |||
# total_keywords1 += keywords | |||
# print("######################相似度: %.2f%%" % similarity, "关键词: %s" % keywords) | |||
similarity, check_desc = glm_utils.AutoDLResult(f"""告诉我下面两段话的重复率百分比是多少: \n第一段话是:'{content_y}', \n ----------------- \n 第二段话是:'{content_x}'""") | |||
# similarity, check_desc = baidu.CallResult( | |||
# f"""告诉我下面两段话的重复率百分比是多少: 第一段话是:'{content_x}', ----------------- 第二段话是:'{content_y}'""") | |||
# 相似度相加 | |||
if similarity is None: | |||
similarity = 0 | |||
print(f"similarity is {similarity}") | |||
total_similarity1 += similarity/100 | |||
mysql.sql_change_msg( | |||
"""insert into idc_project_module_check_detail (dup_module_id, project_name, module_content, dup_module_content, similarity, dimension, create_time, update_time, check_desc) value (%d, "%s", "%s", "%s", %f, "%s", "%s", "%s", "%s")""" | |||
% (dup_module_id, dl[2], escape_string(content_y), escape_string(content_x), similarity, | |||
"功能名称", | |||
str(datetime.datetime.now())[:-7], str(datetime.datetime.now())[:-7], escape_string(check_desc))) | |||
else: | |||
# similarity = cosin_similarity.CosineSimilarity(content_x, content_y) | |||
# # 相似度 关键词 | |||
# similarity, keyword_x, keywords = similarity.main() | |||
# similarity = similarity * 99 | |||
# total_keywords2 += keywords | |||
similarity, check_desc = glm_utils.AutoDLResult(f"""告诉我下面两段话的重复率百分比是多少: \n第一段话是:'{content_y}', \n ----------------- \n 第二段话是:'{content_x}'""") | |||
# similarity, check_desc = baidu.CallResult( | |||
# f"""告诉我下面两段话的重复率百分比是多少: 第一段话是:'{content_x}', ----------------- 第二段话是:'{content_y}'""") | |||
# 临时写入文件 | |||
# check_desc = str(check_desc).replace("\n", " ") | |||
# prompt = f"""告诉我下面两段话的重复率百分比是多少: 第一段话是:'{content_x}', ----------------- 第二段话是:'{content_y}'""" | |||
# prompt = prompt.replace("\n", " ") | |||
# with open('train.json', 'a') as file: | |||
# file.write("{" + f""" | |||
# "content": "{prompt}", | |||
# "summary": "{check_desc}" | |||
# """ + "}\n") | |||
if similarity is None: | |||
similarity = 0 | |||
print(f"similarity is {similarity}") | |||
similarity = similarity * 0.99 | |||
# print("######################相似度: %.2f%%" % similarity, "关键词: %s" % keywords) | |||
# 相似度相加 gnms | |||
total_similarity2 += similarity | |||
# module_content = pro.get("gnms") + "/n" + content_y | |||
# dup_module_content = gc.get("gnms") + "/n" + content_x | |||
module_content = pro.get("gnms") | |||
dup_module_content = gc.get("gnms") | |||
mysql.sql_change_msg( | |||
"""insert into idc_project_module_check_detail (dup_module_id, project_name, module_content, dup_module_content, similarity, dimension, create_time, update_time, check_desc) value (%d, "%s", "%s", "%s", %f, "%s", "%s", "%s", "%s")""" | |||
% (dup_module_id, dl[2], escape_string(module_content), escape_string(dup_module_content), | |||
similarity, | |||
"功能模块描述", | |||
str(datetime.datetime.now())[:-7], str(datetime.datetime.now())[:-7], | |||
escape_string(check_desc))) | |||
mysql.sql_change_msg("""update idc_project_module_check set similarity=%f where dup_module_id=%d""" % ( | |||
total_similarity1 + total_similarity2, dup_module_id)) | |||
def project_check(data_list): | |||
mysql = mysql_pool.ConnMysql() | |||
# mysql.sql_select_many("""select * from mkgjc""") | |||
# 读取维度和权重 | |||
# xmnr_count = len(mysql.sql_select_many("""select * from xmnr_copy1""")) | |||
# gnmk_count = len(mysql.sql_select_many("""select * from gnmk_copy1""")) | |||
xmnr_count = len(mysql.sql_select_many("""select * from user_history_data""")) | |||
gnmk_count = len(mysql.sql_select_many("""select * from user_history_module_data""")) | |||
get_data_dict = getFlag() | |||
# 遍历excel存储路径 | |||
for dl in data_list: | |||
# path = "0825-丽水系统查重维度1.xlsx" | |||
# 读取路径下的excel | |||
print(dl,dl[1]) | |||
df = pd.read_excel(dl[1]) | |||
data = df.values | |||
# 将excel文件中的所有维度内容进行拼接 | |||
join_str = "" | |||
str_dict = {} | |||
gnmk_str = [] | |||
title = "" | |||
er_title = set() | |||
for d in data: | |||
# if pd.notnull(d[0]): | |||
# title = d[0] | |||
# if title == "功能模块": | |||
# er_title.add(d[1]) | |||
# join_str = "" | |||
# for i in d[1:]: | |||
# if pd.notnull(i): | |||
# join_str += str(i) | |||
# str_dict[wdys1.get(title)] = join_str | |||
if pd.notnull(d[0]): | |||
title = d[0] | |||
if title == "功能模块": | |||
er_title.add(d[1]) | |||
join_str = "" | |||
for i in d[1:]: | |||
if pd.notnull(i): | |||
join_str += str(i) | |||
if title == "功能模块": | |||
if i == '功能描述': | |||
continue | |||
else: | |||
gnmk_str.append(i) | |||
str_dict[wdys1.get(title)] = join_str | |||
else: | |||
if title == "功能模块": | |||
er_title.add(d[1]) | |||
for i in d[1:]: | |||
if pd.notnull(i): | |||
join_str += str(i) | |||
str_dict[wdys1.get(title)] = str_dict.get(wdys1.get(title)) + join_str | |||
# print(str_dict) | |||
gnmk = ",".join(gnmk_str) | |||
str_dict['gnmk'] = gnmk | |||
mysql.sql_change_msg( | |||
"""insert into user_data (xmmc, xzwt, xtjc, xmmb, yqjx, jsxq, sjxq, aqxq, ywly, hxyw, ywxq, ywxt, jscj, yhfw, mbqt, jsnr, gnmk, sjgx, znys) value ("%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s")""" | |||
% (dl[0], str_dict.get("xzwt") if str_dict.get("xzwt") else None, | |||
str_dict.get("xtjc") if str_dict.get("xtjc") else None, | |||
str_dict.get("xmmb") if str_dict.get("xmmb") else None, | |||
str_dict.get("yqjx") if str_dict.get("yqjx") else None, | |||
str_dict.get("jsxq") if str_dict.get("jsxq") else None, | |||
str_dict.get("sjxq") if str_dict.get("sjxq") else None, | |||
str_dict.get("aqxq") if str_dict.get("aqxq") else None, | |||
str_dict.get("ywly") if str_dict.get("ywly") else None, | |||
str_dict.get("hxyw") if str_dict.get("hxyw") else None, | |||
str_dict.get("ywxq") if str_dict.get("ywxq") else None, | |||
str_dict.get("ywxt") if str_dict.get("ywxt") else None, | |||
str_dict.get("jscj") if str_dict.get("jscj") else None, | |||
str_dict.get("yhfw") if str_dict.get("yhfw") else None, | |||
str_dict.get("mbqt") if str_dict.get("mbqt") else None, | |||
str_dict.get("jsnr") if str_dict.get("jsnr") else None, | |||
str_dict.get("gnmk") if str_dict.get("gnmk") else None, | |||
str_dict.get("sjgx") if str_dict.get("sjgx") else None, | |||
str_dict.get("znys") if str_dict.get("znys") else None)) | |||
# 或取所有的xmnr_copy1 | |||
xmnr_copy1 = mysql.sql_select_many("""select * from user_history_data WHERE xmmc = '南浔区信息化项目全生命周期管理系统'""") | |||
# 对比xmnr_copy1和xmnr维度是否都有 | |||
if xmnr_copy1: | |||
# threads = [Thread(target=check_project_info, args=(mysql, dl, xc, str_dict)) for xc in xmnr_copy1] | |||
# for t in threads: | |||
# t.start() | |||
# | |||
# for t in threads: | |||
# t.join() | |||
# pro_ths = [] | |||
# for xc in xmnr_copy1: | |||
# # check_project_info(mysql, dl, xc, str_dict) | |||
# p = Thread(target=check_project_info, args=(mysql, dl, xc, str_dict)) | |||
# pro_ths.append(p) | |||
# p.start() | |||
# for p in pro_ths: | |||
# p.join() | |||
for xc in xmnr_copy1: | |||
check_project_info(mysql, dl, xc, str_dict) | |||
project_gjc = {} | |||
for w in wdys2.keys(): | |||
content_x = str_dict.get(w) | |||
content_y = str_dict.get(w) | |||
if content_x and content_y: | |||
# 循环遍历每一个维度 | |||
similarity = cosin_similarity.CosineSimilarity(content_x, content_y) | |||
# 相似度 关键词 | |||
similarity, keywords_x, keywords = similarity.main() | |||
# 去重 | |||
keywords = list(set(keywords)) | |||
project_gjc[w] = keywords | |||
mysql.sql_change_msg( | |||
"""insert into user_keyword (xmmc, xzwt, xtjc, xmmb, yqjx, jsxq, sjxq, aqxq, ywly, hxyw, ywxq, ywxt, jscj, yhfw, mbqt, jsnr, gnmk, sjgx, znys) value ("%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s")""" | |||
% (dl[0], str(project_gjc.get("xzwt"))[1:-1] if project_gjc.get("xzwt") else None, | |||
str(project_gjc.get("xtjc"))[1:-1] if project_gjc.get("xtjc") else None, | |||
str(project_gjc.get("xmmb"))[1:-1] if project_gjc.get("xmmb") else None, | |||
str(project_gjc.get("yqjx"))[1:-1] if project_gjc.get("yqjx") else None, | |||
str(project_gjc.get("jsxq"))[1:-1] if project_gjc.get("jsxq") else None, | |||
str(project_gjc.get("sjxq"))[1:-1] if project_gjc.get("sjxq") else None, | |||
str(project_gjc.get("aqxq"))[1:-1] if project_gjc.get("aqxq") else None, | |||
str(project_gjc.get("ywly"))[1:-1] if project_gjc.get("ywly") else None, | |||
str(project_gjc.get("hxyw"))[1:-1] if project_gjc.get("hxyw") else None, | |||
str(project_gjc.get("ywxq"))[1:-1] if project_gjc.get("ywxq") else None, | |||
str(project_gjc.get("ywxt"))[1:-1] if project_gjc.get("ywxt") else None, | |||
str(project_gjc.get("jscj"))[1:-1] if project_gjc.get("jscj") else None, | |||
str(project_gjc.get("yhfw"))[1:-1] if project_gjc.get("yhfw") else None, | |||
str(project_gjc.get("mbqt"))[1:-1] if project_gjc.get("mbqt") else None, | |||
str(project_gjc.get("jsnr"))[1:-1] if project_gjc.get("jsnr") else None, | |||
str(project_gjc.get("gnmk"))[1:-1] if project_gjc.get("gnmk") else None, | |||
str(project_gjc.get("sjgx"))[1:-1] if project_gjc.get("sjgx") else None, | |||
str(project_gjc.get("znys"))[1:-1] if project_gjc.get("znys") else None)) | |||
mysql.sql_change_msg( | |||
"""update idc_project set dup_status=3, one_vote_veto_status=1, self_check_status=1, history_project_count=%d ,module_count=%d where project_id=%d""" % ( | |||
xmnr_count, gnmk_count, dl[0])) | |||
gong_neng_mo_kuai(mysql, dl, data, er_title) | |||
def check_project_info(mysql, dl, xc, str_dict): | |||
total_keywords = {} | |||
total_similarity = 0 | |||
dup_count = 0 | |||
# 保存相加后的相似度到idc_project_check | |||
mysql.sql_change_msg( | |||
"""insert into idc_project_check (project_id, dup_project_name, file_path, company_name, create_year, project_tag, project_range_tag, project_area, create_time, update_time) value ("%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s")""" | |||
% (dl[0], escape_string(xc.get("xmmc")), escape_string(dl[1]), "", "", "需求相似、业务相似", "历史项目", "", | |||
str(datetime.datetime.now())[:-7], str(datetime.datetime.now())[:-7])) | |||
dup_id = mysql.cur.lastrowid | |||
for x in list(xc.keys())[1:]: | |||
content_x = xc.get(x) | |||
content_y = str_dict.get(x) | |||
if content_x and content_y: | |||
if x == 'gnmk': | |||
continue | |||
elif x == 'jsnr': | |||
continue | |||
else: | |||
dup_count += 1 | |||
if ((xc['gnmk'] == 'None' or xc['gnmk'] is None or str.strip(xc['gnmk']) == '') and (str_dict['gnmk'] is None or str.strip(str_dict['gnmk']) == '')) and ( | |||
not xc['jsnr'] is None and xc['jsnr'] != 'None' and not str_dict['jsnr'] is None and len(str.strip(str_dict['jsnr'])) > 0): | |||
for x in list(xc.keys())[1:]: | |||
content_x = xc.get(x) | |||
content_y = str_dict.get(x) | |||
if content_x and content_y: | |||
if x == 'gnmk': | |||
# 匹配到历史数据,次数加1 | |||
# dup_count += dup_file_test | |||
# 循环遍历每一个维度 | |||
similarity = cosin_similarity.CosineSimilarity(content_x, content_y) | |||
# 相似度 关键词 | |||
similarity, keywords_x, keywords_y = similarity.main() | |||
similarity = similarity * 0 | |||
# print("**************相似度: %.2f%%" % similarity, "关键词: %s" % keywords_y) | |||
# 相似度相加 | |||
total_similarity += similarity | |||
# 去重 | |||
keywords_y = list(set(keywords_y)) | |||
# 去重 | |||
keywords_x = list(set(keywords_x)) | |||
# 关键词收集 | |||
total_keywords[x] = keywords_y | |||
function_content = content_y | |||
dup_function_content = content_x | |||
for word_y in keywords_y: | |||
word_y = word_y.strip().strip("'").strip('"') | |||
if word_y != '': | |||
function_content = str(function_content.replace("\"", "'")).replace(word_y, | |||
f'<span class="similarity">{word_y.strip()}</span>') | |||
for word_x in keywords_x: | |||
word_x = word_x.strip().strip("'").strip('"') | |||
if word_x != '': | |||
dup_function_content = str(dup_function_content.replace("\"", "'")).replace(word_x, | |||
f'<span class="similarity">{word_x.strip()}</span>') | |||
# 保存每个维度对应的相似度到idc_project_check_detail | |||
mysql.sql_change_msg( | |||
"""insert into idc_project_check_detail (dup_id, dimension, similarity, function_content, dup_function_content, create_time, update_time) value (%d, "%s", %f, "%s", "%s", "%s", "%s")""" | |||
% (dup_id, wdys2.get(x), similarity, escape_string(function_content), | |||
escape_string(dup_function_content), str(datetime.datetime.now())[:-7], | |||
str(datetime.datetime.now())[:-7])) | |||
# content = content.replace(gjcs, f'<span class="similarity">{gjcs.strip()}</span>') | |||
elif x == 'jsnr': | |||
similarity = cosin_similarity.CosineSimilarity(content_x, content_y) | |||
# 相似度 关键词 | |||
similarity, keywords_x, keywords_y = similarity.main() | |||
similarity = similarity * 40 | |||
# print("**************相似度: %.2f%%" % similarity, "关键词: %s" % keywords_y) | |||
# 相似度相加 | |||
total_similarity += similarity | |||
# 去重 | |||
keywords_y = list(set(keywords_y)) | |||
# 去重 | |||
keywords_x = list(set(keywords_x)) | |||
# 关键词收集 | |||
total_keywords[x] = keywords_y | |||
function_content = content_y | |||
dup_function_content = content_x | |||
for word_y in keywords_y: | |||
word_y = word_y.strip().strip("'").strip('"') | |||
if word_y != '': | |||
function_content = str(function_content.replace("\"", "'")).replace(word_y, | |||
f'<span class="similarity">{word_y.strip()}</span>') | |||
for word_x in keywords_x: | |||
word_x = word_x.strip().strip("'").strip('"') | |||
if word_x != '': | |||
dup_function_content = str(dup_function_content.replace("\"", "'")).replace(word_x, | |||
f'<span class="similarity">{word_x.strip()}</span>') | |||
# 保存每个维度对应的相似度到idc_project_check_detail | |||
mysql.sql_change_msg( | |||
"""insert into idc_project_check_detail (dup_id, dimension, similarity, function_content, dup_function_content, create_time, update_time) value (%d, "%s", %f, "%s", "%s", "%s", "%s")""" | |||
% (dup_id, wdys2.get(x), similarity, escape_string(function_content), | |||
escape_string(dup_function_content), str(datetime.datetime.now())[:-7], | |||
str(datetime.datetime.now())[:-7])) | |||
else: | |||
similarity = cosin_similarity.CosineSimilarity(content_x, content_y) | |||
# 相似度 关键词 | |||
similarity, keywords_x, keywords_y = similarity.main() | |||
similarity = similarity * (60 / dup_count) | |||
# print("**************相似度: %.2f%%" % similarity, "关键词: %s" % keywords_y) | |||
# 相似度相加 | |||
total_similarity += similarity | |||
# 去重 | |||
keywords_y = list(set(keywords_y)) | |||
# 去重 | |||
keywords_x = list(set(keywords_x)) | |||
# 关键词收集 | |||
total_keywords[x] = keywords_y | |||
function_content = content_y | |||
dup_function_content = content_x | |||
for word_y in keywords_y: | |||
word_y = word_y.strip().strip("'").strip('"') | |||
if word_y != '': | |||
function_content = str(function_content.replace("\"", "'")).replace(word_y, | |||
f'<span class="similarity">{word_y.strip()}</span>') | |||
for word_x in keywords_x: | |||
word_x = word_x.strip().strip("'").strip('"') | |||
if word_x != '': | |||
dup_function_content = str(dup_function_content.replace("\"", "'")).replace(word_x, | |||
f'<span class="similarity">{word_x.strip()}</span>') | |||
# 保存每个维度对应的相似度到idc_project_check_detail | |||
mysql.sql_change_msg( | |||
"""insert into idc_project_check_detail (dup_id, dimension, similarity, function_content, dup_function_content, create_time, update_time) value (%d, "%s", %f, "%s", "%s", "%s", "%s")""" | |||
% (dup_id, wdys2.get(x), similarity, escape_string(function_content), | |||
escape_string(dup_function_content), str(datetime.datetime.now())[:-7], | |||
str(datetime.datetime.now())[:-7])) | |||
elif ((xc['jsnr'] == 'None' or xc['jsnr'] is None or str.strip(xc['jsnr']) == '') and (str_dict['jsnr'] is None or str.strip(str_dict['jsnr']) == '')) and ( | |||
not xc['gnmk'] is None and xc['gnmk'] != 'None' and not str_dict['gnmk'] is None and len(str.strip(str_dict['gnmk'])) > 0): | |||
for x in list(xc.keys())[1:]: | |||
content_x = xc.get(x) | |||
content_y = str_dict.get(x) | |||
if content_x and content_y: | |||
if x == 'gnmk': | |||
# 匹配到历史数据,次数加1 | |||
# dup_count += dup_file_test | |||
# 循环遍历每一个维度 | |||
similarity = cosin_similarity.CosineSimilarity(content_x, content_y) | |||
# 相似度 关键词 | |||
similarity, keywords_x, keywords_y = similarity.main() | |||
similarity = similarity * 50 | |||
# print("**************相似度: %.2f%%" % similarity, "关键词: %s" % keywords_y) | |||
# 相似度相加 | |||
total_similarity += similarity | |||
# 去重 | |||
keywords_y = list(set(keywords_y)) | |||
# 去重 | |||
keywords_x = list(set(keywords_x)) | |||
# 关键词收集 | |||
total_keywords[x] = keywords_y | |||
function_content = content_y | |||
dup_function_content = content_x | |||
for word_y in keywords_y: | |||
word_y = word_y.strip().strip("'").strip('"') | |||
if word_y != '': | |||
function_content = str(function_content.replace("\"", "'")).replace(word_y, | |||
f'<span class="similarity">{word_y.strip()}</span>') | |||
for word_x in keywords_x: | |||
word_x = word_x.strip().strip("'").strip('"') | |||
if word_x != '': | |||
dup_function_content = str(dup_function_content.replace("\"", "'")).replace(word_x, | |||
f'<span class="similarity">{word_x.strip()}</span>') | |||
# 保存每个维度对应的相似度到idc_project_check_detail | |||
mysql.sql_change_msg( | |||
"""insert into idc_project_check_detail (dup_id, dimension, similarity, function_content, dup_function_content, create_time, update_time) value (%d, "%s", %f, "%s", "%s", "%s", "%s")""" | |||
% (dup_id, wdys2.get(x), similarity, escape_string(function_content), | |||
escape_string(dup_function_content), str(datetime.datetime.now())[:-7], | |||
str(datetime.datetime.now())[:-7])) | |||
# content = content.replace(gjcs, f'<span class="similarity">{gjcs.strip()}</span>') | |||
elif x == 'jsnr': | |||
similarity = cosin_similarity.CosineSimilarity(content_x, content_y) | |||
# 相似度 关键词 | |||
similarity, keywords_x, keywords_y = similarity.main() | |||
similarity = similarity * 0 | |||
# print("**************相似度: %.2f%%" % similarity, "关键词: %s" % keywords_y) | |||
# 相似度相加 | |||
total_similarity += similarity | |||
# 去重 | |||
keywords_y = list(set(keywords_y)) | |||
# 去重 | |||
keywords_x = list(set(keywords_x)) | |||
# 关键词收集 | |||
total_keywords[x] = keywords_y | |||
function_content = content_y | |||
dup_function_content = content_x | |||
for word_y in keywords_y: | |||
word_y = word_y.strip().strip("'").strip('"') | |||
if word_y != '': | |||
function_content = str(function_content.replace("\"", "'")).replace(word_y, | |||
f'<span class="similarity">{word_y.strip()}</span>') | |||
for word_x in keywords_x: | |||
word_x = word_x.strip().strip("'").strip('"') | |||
if word_x != '': | |||
dup_function_content = str(dup_function_content.replace("\"", "'")).replace(word_x, | |||
f'<span class="similarity">{word_x.strip()}</span>') | |||
# 保存每个维度对应的相似度到idc_project_check_detail | |||
mysql.sql_change_msg( | |||
"""insert into idc_project_check_detail (dup_id, dimension, similarity, function_content, dup_function_content, create_time, update_time) value (%d, "%s", %f, "%s", "%s", "%s", "%s")""" | |||
% (dup_id, wdys2.get(x), similarity, escape_string(function_content), | |||
escape_string(dup_function_content), str(datetime.datetime.now())[:-7], | |||
str(datetime.datetime.now())[:-7])) | |||
else: | |||
similarity = cosin_similarity.CosineSimilarity(content_x, content_y) | |||
# 相似度 关键词 | |||
similarity, keywords_x, keywords_y = similarity.main() | |||
similarity = similarity * (50 / dup_count) | |||
# print("**************相似度: %.2f%%" % similarity, "关键词: %s" % keywords_y) | |||
# 相似度相加 | |||
total_similarity += similarity | |||
# 去重 | |||
keywords_y = list(set(keywords_y)) | |||
# 去重 | |||
keywords_x = list(set(keywords_x)) | |||
# 关键词收集 | |||
total_keywords[x] = keywords_y | |||
function_content = content_y | |||
dup_function_content = content_x | |||
for word_y in keywords_y: | |||
word_y = word_y.strip().strip("'").strip('"') | |||
if word_y != '': | |||
function_content = str(function_content.replace("\"", "'")).replace(word_y, | |||
f'<span class="similarity">{word_y.strip()}</span>') | |||
for word_x in keywords_x: | |||
word_x = word_x.strip().strip("'").strip('"') | |||
if word_x != '': | |||
dup_function_content = str(dup_function_content.replace("\"", "'")).replace(word_x, | |||
f'<span class="similarity">{word_x.strip()}</span>') | |||
# 保存每个维度对应的相似度到idc_project_check_detail | |||
mysql.sql_change_msg( | |||
"""insert into idc_project_check_detail (dup_id, dimension, similarity, function_content, dup_function_content, create_time, update_time) value (%d, "%s", %f, "%s", "%s", "%s", "%s")""" | |||
% (dup_id, wdys2.get(x), similarity, escape_string(function_content), | |||
escape_string(dup_function_content), str(datetime.datetime.now())[:-7], | |||
str(datetime.datetime.now())[:-7])) | |||
else: | |||
for x in list(xc.keys())[1:]: | |||
content_x = xc.get(x) | |||
content_y = str_dict.get(x) | |||
if content_x and content_y: | |||
if x == 'gnmk': | |||
# 匹配到历史数据,次数加1 | |||
# dup_count += dup_file_test | |||
# 循环遍历每一个维度 | |||
similarity = cosin_similarity.CosineSimilarity(content_x, content_y) | |||
# 相似度 关键词 | |||
similarity, keywords_x, keywords_y = similarity.main() | |||
similarity = similarity * 50 | |||
# 相似度相加 | |||
total_similarity += similarity | |||
# 去重 | |||
keywords_y = list(set(keywords_y)) | |||
# 去重 | |||
keywords_x = list(set(keywords_x)) | |||
print("**************相似度: %.2f%%" % similarity, "关键词: %s" % keywords_y) | |||
# 关键词收集 | |||
total_keywords[x] = keywords_y | |||
function_content = content_y | |||
dup_function_content = content_x | |||
for word_y in keywords_y: | |||
word_y = word_y.strip().strip("'").strip('"') | |||
if word_y != '': | |||
function_content = str(function_content.replace("\"", "'")).replace(word_y, | |||
f'<span class="similarity">{word_y.strip()}</span>') | |||
for word_x in keywords_x: | |||
word_x = word_x.strip().strip("'").strip('"') | |||
if word_x != '': | |||
dup_function_content = str(dup_function_content.replace("\"", "'")).replace(word_x, | |||
f'<span class="similarity">{word_x.strip()}</span>') | |||
# 保存每个维度对应的相似度到idc_project_check_detail | |||
mysql.sql_change_msg( | |||
"""insert into idc_project_check_detail (dup_id, dimension, similarity, function_content, dup_function_content, create_time, update_time) value (%d, "%s", %f, "%s", "%s", "%s", "%s")""" | |||
% (dup_id, wdys2.get(x), similarity, escape_string(function_content), | |||
escape_string(dup_function_content), str(datetime.datetime.now())[:-7], | |||
str(datetime.datetime.now())[:-7])) | |||
# content = content.replace(gjcs, f'<span class="similarity">{gjcs.strip()}</span>') | |||
elif x == 'jsnr': | |||
similarity = cosin_similarity.CosineSimilarity(content_x, content_y) | |||
# 相似度 关键词 | |||
similarity, keywords_x, keywords_y = similarity.main() | |||
similarity = similarity * 40 | |||
# 相似度相加 | |||
total_similarity += similarity | |||
# 去重 | |||
keywords_y = list(set(keywords_y)) | |||
# 去重 | |||
keywords_x = list(set(keywords_x)) | |||
print("**************相似度: %.2f%%" % similarity, "关键词: %s" % keywords_y) | |||
# 关键词收集 | |||
total_keywords[x] = keywords_y | |||
function_content = content_y | |||
dup_function_content = content_x | |||
for word_y in keywords_y: | |||
word_y = word_y.strip().strip("'").strip('"') | |||
if word_y != '': | |||
function_content = str(function_content.replace("\"", "'")).replace(word_y, | |||
f'<span class="similarity">{word_y.strip()}</span>') | |||
for word_x in keywords_x: | |||
word_x = word_x.strip().strip("'").strip('"') | |||
if word_x != '': | |||
dup_function_content = str(dup_function_content.replace("\"", "'")).replace(word_x, | |||
f'<span class="similarity">{word_x.strip()}</span>') | |||
# 保存每个维度对应的相似度到idc_project_check_detail | |||
mysql.sql_change_msg( | |||
"""insert into idc_project_check_detail (dup_id, dimension, similarity, function_content, dup_function_content, create_time, update_time) value (%d, "%s", %f, "%s", "%s", "%s", "%s")""" | |||
% (dup_id, wdys2.get(x), similarity, escape_string(function_content), | |||
escape_string(dup_function_content), str(datetime.datetime.now())[:-7], | |||
str(datetime.datetime.now())[:-7])) | |||
else: | |||
similarity = cosin_similarity.CosineSimilarity(content_x, content_y) | |||
# 相似度 关键词 | |||
similarity, keywords_x, keywords_y = similarity.main() | |||
similarity = similarity * (10 / dup_count) | |||
# print("**************相似度: %.2f%%" % similarity, "关键词: %s" % keywords_y) | |||
# 相似度相加 | |||
total_similarity += similarity | |||
# 去重 | |||
keywords_y = list(set(keywords_y)) | |||
# 去重 | |||
keywords_x = list(set(keywords_x)) | |||
# 关键词收集 | |||
total_keywords[x] = keywords_y | |||
function_content = content_y | |||
dup_function_content = content_x | |||
for word_y in keywords_y: | |||
word_y = word_y.strip().strip("'").strip('"') | |||
if word_y != '': | |||
function_content = str(function_content.replace("\"", "'")).replace(word_y, f'<span class="similarity">{word_y.strip()}</span>') | |||
for word_x in keywords_x: | |||
word_x = word_x.strip().strip("'").strip('"') | |||
if word_x != '': | |||
dup_function_content = str(dup_function_content.replace("\"", "'")).replace(word_x, f'<span class="similarity">{word_x.strip()}</span>') | |||
# 保存每个维度对应的相似度到idc_project_check_detail | |||
mysql.sql_change_msg( | |||
"""insert into idc_project_check_detail (dup_id, dimension, similarity, function_content, dup_function_content, create_time, update_time) value (%d, "%s", %f, "%s", "%s", "%s", "%s")""" | |||
% (dup_id, wdys2.get(x), similarity, escape_string(function_content), | |||
escape_string(dup_function_content), str(datetime.datetime.now())[:-7], | |||
str(datetime.datetime.now())[:-7])) | |||
mysql.sql_change_msg( | |||
"""update idc_project_check set similarity=%f where dup_id=%d""" % (total_similarity, dup_id)) | |||
if __name__ == "__main__": | |||
all_path = requests.get("http://127.0.0.1:19099/check/duplicates/%s" % 599).json() | |||
# print(all_path) | |||
# dict1 = {k:v for k, v in sorted(dict.items(), key= lambda item : item[1])} | |||
# print(dict1) | |||
data_list = [] | |||
for ap in all_path.get("data"): | |||
# if os.path.exists(ap.get("file_path")): | |||
data_list.append((ap.get("project_id"), ap.get("file_path"), ap.get("project_name"))) | |||
print(data_list) | |||
# data_list = [(11, r"C:\Users\HUAWEI\PycharmProjects\nlp\dup_check\0825-丽水系统查重维度1.xlsx", "水路运输综合监管系统建设项目.xls")] | |||
data_list = [(11, r"D:\ningda\dup_check2\dup_check\0825-丽水系统查重维度1.xlsx", "水路运输综合监管系统建设项目.xls")] | |||
project_check(data_list) |
@@ -0,0 +1,675 @@ | |||
# coding=utf-8 | |||
import sys | |||
import re | |||
import mysql_pool | |||
from pymysql.converters import escape_string | |||
import cosin_similarity | |||
import pandas as pd | |||
import datetime | |||
import requests | |||
wdys1 = { | |||
"项目名称": "xmmc", | |||
"现状问题": "xzwt", | |||
"系统基础": "xtjc", | |||
"项目目标": "xmmb", | |||
"预期绩效": "yqjx", | |||
"建设需求": "jsxq", | |||
"数据需求": "sjxq", | |||
"安全需求": "aqxq", | |||
"业务领域": "ywly", | |||
"核心业务": "hxyw", | |||
"业务需求": "ywxq", | |||
"业务协同": "ywxt", | |||
"建设层级": "jscj", | |||
"用户范围": "yhfw", | |||
"目标群体": "mbqt", | |||
"建设内容": "jsnr", | |||
"功能模块": "gnmk", | |||
"数据共享": "sjgx", | |||
"智能要素": "znys" | |||
} | |||
wdys2 = { | |||
"xmmc": "项目名称", | |||
"xzwt": "现状问题", | |||
"xtjc": "系统基础", | |||
"xmmb": "项目目标", | |||
"yqjx": "预期绩效", | |||
"jsxq": "建设需求", | |||
"sjxq": "数据需求", | |||
"aqxq": "安全需求", | |||
"ywly": "业务领域", | |||
"hxyw": "核心业务", | |||
"ywxq": "业务需求", | |||
"ywxt": "业务协同", | |||
"jscj": "建设层级", | |||
"yhfw": "用户范围", | |||
"mbqt": "目标群体", | |||
"jsnr": "建设内容", | |||
"gnmk": "功能模块", | |||
"sjgx": "数据共享", | |||
"znys": "智能要素" | |||
} | |||
gnmkys = { | |||
"gnmc": "功能名称", | |||
"gnms": "功能描述" | |||
} | |||
def getFlag(): | |||
data_dict = {} | |||
df = pd.read_excel("0825.xlsx") | |||
data = df.values | |||
data = list(pd.Series(data[:, 1]).dropna()) | |||
for d in data: | |||
try: | |||
wd = re.search("(.*?)(.*?%)", d).group(1).strip() | |||
wdc = wdys1.get(wd) | |||
if wdc: | |||
qz = re.search(".*?((.*?%))", d).group(1) | |||
data_dict[wdc] = qz | |||
except: | |||
pass | |||
return data_dict | |||
def gong_neng_mo_kuai(mysql, dl, data, er_title): | |||
# 将excel文件中的所有第三维度内容进行拼接 | |||
str_dict = {} | |||
for et in er_title: | |||
for d in data: | |||
if d[1] == et: | |||
if str_dict.get(et): | |||
str_dict[et] = str_dict.get(et) + d[3] | |||
else: | |||
str_dict[et] = d[3] | |||
for k, v in str_dict.items(): | |||
mysql.sql_change_msg( | |||
"""insert into idc_project_module (project_id, check_duplicate_count, module_name, module_content, create_time, update_time, tag) value(%d, 1, "%s", "%s", "%s", "%s", "模块")""" % ( | |||
int(dl[0]), k, v, str(datetime.datetime.now())[:-7], str(datetime.datetime.now())[:-7])) | |||
module_id_list = mysql.sql_select_many( | |||
"""select project_module_id, module_name, module_content from idc_project_module where project_id=%d""" % dl[ | |||
0]) | |||
data_list = [] | |||
for mil in module_id_list: | |||
data_dict = {} | |||
data_dict["project_module_id"] = mil.get("project_module_id") | |||
data_dict["gnmc"] = mil.get("module_name") | |||
data_dict["gnms"] = mil.get("module_content") | |||
data_list.append(data_dict) | |||
# print(data_list) | |||
for i in data_list: | |||
gnmk_copy1 = mysql.sql_select_many("""select * from user_history_module_data""") | |||
if gnmk_copy1: | |||
for gc in gnmk_copy1: | |||
print( | |||
"""insert into idc_project_module_check (project_module_id, module_name, project_name, company_name, create_time, update_time) value(%d, "%s", "%s", "%s", "%s", "%s")""" | |||
% ( | |||
i.get("project_module_id"), escape_string(gc.get("gnmc")), escape_string(gc.get("xmmc")), "", | |||
str(datetime.datetime.now())[:-7], | |||
str(datetime.datetime.now())[:-7])) | |||
mysql.sql_change_msg( | |||
"""insert into idc_project_module_check (project_module_id, module_name, project_name, company_name, create_time, update_time) value(%d, "%s", "%s", "%s", "%s", "%s")""" | |||
% ( | |||
i.get("project_module_id"), escape_string(gc.get("gnmc")), escape_string(gc.get("xmmc")), "", | |||
str(datetime.datetime.now())[:-7], | |||
str(datetime.datetime.now())[:-7])) | |||
dup_module_id = mysql.cur.lastrowid | |||
check_module_info(mysql, gc, dl, i, dup_module_id) | |||
gnmk_gjc = {} | |||
for a in ["gnmc", "gnms"]: | |||
if i.get(a): | |||
content_x = i.get(a) | |||
content_y = i.get(a) | |||
if a == "gnmc": | |||
similarity = cosin_similarity.CosineSimilarity(content_x, content_y) | |||
# 相似度 关键词 | |||
similarity, keyword_x, keywords = similarity.main() | |||
# 去重 | |||
keywords = list(set(keywords)) | |||
gnmk_gjc[a] = keywords | |||
else: | |||
similarity = cosin_similarity.CosineSimilarity(content_x, content_y) | |||
# 相似度 关键词 | |||
similarity, keyword_x, keywords = similarity.main() | |||
# 去重 | |||
keywords = list(set(keywords)) | |||
gnmk_gjc[a] = keywords | |||
mysql.sql_change_msg("""insert into user_module_keywords (xmmc, gnmc, gnms) value("%s", "%s", "%s")""" % ( | |||
dl[2], str(gnmk_gjc.get("gnmc"))[1:-1] if gnmk_gjc.get("gnmc") else None, | |||
str(gnmk_gjc.get("gnms"))[1:-1] if gnmk_gjc.get("gnms") else None)) | |||
def check_module_info(mysql, gc, dl, pro, dup_module_id): | |||
total_similarity1 = 0 | |||
total_keywords1 = [] | |||
total_similarity2 = 0 | |||
total_keywords2 = [] | |||
for j in ["gnmc", "gnms"]: | |||
# 循环遍历每一个模块名称 | |||
content_x = gc.get(j) | |||
content_y = pro.get(j) | |||
if content_x and content_y: | |||
if j == "gnmc": | |||
similarity = cosin_similarity.CosineSimilarity(content_x, content_y) | |||
# 相似度 关键词 | |||
similarity, keyword_x, keywords = similarity.main() | |||
similarity = similarity * 1 | |||
total_keywords1 += keywords | |||
# print("######################相似度: %.2f%%" % similarity, "关键词: %s" % keywords) | |||
# 相似度相加 | |||
total_similarity1 += similarity | |||
mysql.sql_change_msg( | |||
"""insert into idc_project_module_check_detail (dup_module_id, project_name, module_content, dup_module_content, similarity, dimension, create_time, update_time) value (%d, "%s", "%s", "%s", %f, "%s", "%s", "%s")""" | |||
% (dup_module_id, dl[2], escape_string(content_y), escape_string(content_x), similarity, | |||
"功能名称", | |||
str(datetime.datetime.now())[:-7], str(datetime.datetime.now())[:-7])) | |||
else: | |||
similarity = cosin_similarity.CosineSimilarity(content_x, content_y) | |||
# 相似度 关键词 | |||
similarity, keyword_x, keywords = similarity.main() | |||
similarity = similarity * 99 | |||
total_keywords2 += keywords | |||
# print("######################相似度: %.2f%%" % similarity, "关键词: %s" % keywords) | |||
# 相似度相加 | |||
total_similarity2 += similarity | |||
mysql.sql_change_msg( | |||
"""insert into idc_project_module_check_detail (dup_module_id, project_name, module_content, dup_module_content, similarity, dimension, create_time, update_time) value (%d, "%s", "%s", "%s", %f, "%s", "%s", "%s")""" | |||
% (dup_module_id, dl[2], escape_string(content_y), escape_string(content_x), similarity, | |||
"功能模块描述", | |||
str(datetime.datetime.now())[:-7], str(datetime.datetime.now())[:-7])) | |||
mysql.sql_change_msg("""update idc_project_module_check set similarity=%f where dup_module_id=%d""" % ( | |||
total_similarity1 + total_similarity2, dup_module_id)) | |||
def project_check(data_list): | |||
mysql = mysql_pool.ConnMysql() | |||
# mysql.sql_select_many("""select * from mkgjc""") | |||
# 读取维度和权重 | |||
# xmnr_count = len(mysql.sql_select_many("""select * from xmnr_copy1""")) | |||
# gnmk_count = len(mysql.sql_select_many("""select * from gnmk_copy1""")) | |||
xmnr_count = len(mysql.sql_select_many("""select * from user_history_data""")) | |||
gnmk_count = len(mysql.sql_select_many("""select * from user_history_module_data""")) | |||
get_data_dict = getFlag() | |||
# 遍历excel存储路径 | |||
for dl in data_list: | |||
# path = "0825-丽水系统查重维度1.xlsx" | |||
# 读取路径下的excel | |||
print(dl,dl[1]) | |||
df = pd.read_excel(dl[1]) | |||
data = df.values | |||
# 将excel文件中的所有维度内容进行拼接 | |||
join_str = "" | |||
str_dict = {} | |||
gnmk_str = [] | |||
title = "" | |||
er_title = set() | |||
for d in data: | |||
# if pd.notnull(d[0]): | |||
# title = d[0] | |||
# if title == "功能模块": | |||
# er_title.add(d[1]) | |||
# join_str = "" | |||
# for i in d[1:]: | |||
# if pd.notnull(i): | |||
# join_str += str(i) | |||
# str_dict[wdys1.get(title)] = join_str | |||
if pd.notnull(d[0]): | |||
title = d[0] | |||
if title == "功能模块": | |||
er_title.add(d[1]) | |||
join_str = "" | |||
for i in d[1:]: | |||
if pd.notnull(i): | |||
join_str += str(i) | |||
if title == "功能模块": | |||
if i == '功能描述': | |||
continue | |||
else: | |||
gnmk_str.append(i) | |||
str_dict[wdys1.get(title)] = join_str | |||
else: | |||
if title == "功能模块": | |||
er_title.add(d[1]) | |||
for i in d[1:]: | |||
if pd.notnull(i): | |||
join_str += str(i) | |||
str_dict[wdys1.get(title)] = str_dict.get(wdys1.get(title)) + join_str | |||
# print(str_dict) | |||
gnmk = ",".join(gnmk_str) | |||
str_dict['gnmk'] = gnmk | |||
mysql.sql_change_msg( | |||
"""insert into user_data (xmmc, xzwt, xtjc, xmmb, yqjx, jsxq, sjxq, aqxq, ywly, hxyw, ywxq, ywxt, jscj, yhfw, mbqt, jsnr, gnmk, sjgx, znys) value ("%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s")""" | |||
% (dl[0], str_dict.get("xzwt") if str_dict.get("xzwt") else None, | |||
str_dict.get("xtjc") if str_dict.get("xtjc") else None, | |||
str_dict.get("xmmb") if str_dict.get("xmmb") else None, | |||
str_dict.get("yqjx") if str_dict.get("yqjx") else None, | |||
str_dict.get("jsxq") if str_dict.get("jsxq") else None, | |||
str_dict.get("sjxq") if str_dict.get("sjxq") else None, | |||
str_dict.get("aqxq") if str_dict.get("aqxq") else None, | |||
str_dict.get("ywly") if str_dict.get("ywly") else None, | |||
str_dict.get("hxyw") if str_dict.get("hxyw") else None, | |||
str_dict.get("ywxq") if str_dict.get("ywxq") else None, | |||
str_dict.get("ywxt") if str_dict.get("ywxt") else None, | |||
str_dict.get("jscj") if str_dict.get("jscj") else None, | |||
str_dict.get("yhfw") if str_dict.get("yhfw") else None, | |||
str_dict.get("mbqt") if str_dict.get("mbqt") else None, | |||
str_dict.get("jsnr") if str_dict.get("jsnr") else None, | |||
str_dict.get("gnmk") if str_dict.get("gnmk") else None, | |||
str_dict.get("sjgx") if str_dict.get("sjgx") else None, | |||
str_dict.get("znys") if str_dict.get("znys") else None)) | |||
# 或取所有的xmnr_copy1 | |||
xmnr_copy1 = mysql.sql_select_many("""select * from user_history_data""") | |||
# 对比xmnr_copy1和xmnr维度是否都有 | |||
if xmnr_copy1: | |||
# threads = [Thread(target=check_project_info, args=(mysql, dl, xc, str_dict)) for xc in xmnr_copy1] | |||
# for t in threads: | |||
# t.start() | |||
# | |||
# for t in threads: | |||
# t.join() | |||
for xc in xmnr_copy1: | |||
check_project_info(mysql, dl, xc, str_dict) | |||
project_gjc = {} | |||
for w in wdys2.keys(): | |||
content_x = str_dict.get(w) | |||
content_y = str_dict.get(w) | |||
if content_x and content_y: | |||
# 循环遍历每一个维度 | |||
similarity = cosin_similarity.CosineSimilarity(content_x, content_y) | |||
# 相似度 关键词 | |||
similarity, keywords_x, keywords = similarity.main() | |||
# 去重 | |||
keywords = list(set(keywords)) | |||
project_gjc[w] = keywords | |||
mysql.sql_change_msg( | |||
"""insert into user_keyword (xmmc, xzwt, xtjc, xmmb, yqjx, jsxq, sjxq, aqxq, ywly, hxyw, ywxq, ywxt, jscj, yhfw, mbqt, jsnr, gnmk, sjgx, znys) value ("%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s")""" | |||
% (dl[0], str(project_gjc.get("xzwt"))[1:-1] if project_gjc.get("xzwt") else None, | |||
str(project_gjc.get("xtjc"))[1:-1] if project_gjc.get("xtjc") else None, | |||
str(project_gjc.get("xmmb"))[1:-1] if project_gjc.get("xmmb") else None, | |||
str(project_gjc.get("yqjx"))[1:-1] if project_gjc.get("yqjx") else None, | |||
str(project_gjc.get("jsxq"))[1:-1] if project_gjc.get("jsxq") else None, | |||
str(project_gjc.get("sjxq"))[1:-1] if project_gjc.get("sjxq") else None, | |||
str(project_gjc.get("aqxq"))[1:-1] if project_gjc.get("aqxq") else None, | |||
str(project_gjc.get("ywly"))[1:-1] if project_gjc.get("ywly") else None, | |||
str(project_gjc.get("hxyw"))[1:-1] if project_gjc.get("hxyw") else None, | |||
str(project_gjc.get("ywxq"))[1:-1] if project_gjc.get("ywxq") else None, | |||
str(project_gjc.get("ywxt"))[1:-1] if project_gjc.get("ywxt") else None, | |||
str(project_gjc.get("jscj"))[1:-1] if project_gjc.get("jscj") else None, | |||
str(project_gjc.get("yhfw"))[1:-1] if project_gjc.get("yhfw") else None, | |||
str(project_gjc.get("mbqt"))[1:-1] if project_gjc.get("mbqt") else None, | |||
str(project_gjc.get("jsnr"))[1:-1] if project_gjc.get("jsnr") else None, | |||
str(project_gjc.get("gnmk"))[1:-1] if project_gjc.get("gnmk") else None, | |||
str(project_gjc.get("sjgx"))[1:-1] if project_gjc.get("sjgx") else None, | |||
str(project_gjc.get("znys"))[1:-1] if project_gjc.get("znys") else None)) | |||
mysql.sql_change_msg( | |||
"""update idc_project set dup_status=3, one_vote_veto_status=1, self_check_status=1, history_project_count=%d ,module_count=%d where project_id=%d""" % ( | |||
xmnr_count, gnmk_count, dl[0])) | |||
gong_neng_mo_kuai(mysql, dl, data, er_title) | |||
def check_project_info(mysql, dl, xc, str_dict): | |||
total_keywords = {} | |||
total_similarity = 0 | |||
dup_count = 0 | |||
# 保存相加后的相似度到idc_project_check | |||
print(f'xmmc is {xc.get("xmmc")}') | |||
mysql.sql_change_msg( | |||
"""insert into idc_project_check (project_id, dup_project_name, file_path, company_name, create_year, project_tag, project_range_tag, project_area, create_time, update_time) value ("%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s")""" | |||
% (dl[0], escape_string(xc.get("xmmc")), escape_string(dl[1]), "", "", "需求相似、业务相似", "历史项目", "", | |||
str(datetime.datetime.now())[:-7], str(datetime.datetime.now())[:-7])) | |||
dup_id = mysql.cur.lastrowid | |||
for x in list(xc.keys())[1:]: | |||
content_x = xc.get(x) | |||
content_y = str_dict.get(x) | |||
if content_x and content_y: | |||
if x == 'gnmk': | |||
continue | |||
elif x == 'jsnr': | |||
continue | |||
else: | |||
dup_count += 1 | |||
if ((xc['gnmk'] == 'None' or xc['gnmk'] is None or str.strip(xc['gnmk']) == '') and (str_dict['gnmk'] is None or str.strip(str_dict['gnmk']) == '')) and ( | |||
not xc['jsnr'] is None and xc['jsnr'] != 'None' and not str_dict['jsnr'] is None and len(str.strip(str_dict['jsnr'])) > 0): | |||
for x in list(xc.keys())[1:]: | |||
content_x = xc.get(x) | |||
content_y = str_dict.get(x) | |||
if content_x and content_y: | |||
if x == 'gnmk': | |||
# 匹配到历史数据,次数加1 | |||
# dup_count += dup_file_test | |||
# 循环遍历每一个维度 | |||
similarity = cosin_similarity.CosineSimilarity(content_x, content_y) | |||
# 相似度 关键词 | |||
similarity, keywords_x, keywords_y = similarity.main() | |||
similarity = similarity * 0 | |||
# print("**************相似度: %.2f%%" % similarity, "关键词: %s" % keywords_y) | |||
# 相似度相加 | |||
total_similarity += similarity | |||
# 去重 | |||
keywords_y = list(set(keywords_y)) | |||
# 去重 | |||
keywords_x = list(set(keywords_x)) | |||
# 关键词收集 | |||
total_keywords[x] = keywords_y | |||
function_content = content_y | |||
dup_function_content = content_x | |||
for word_y in keywords_y: | |||
word_y = word_y.strip().strip("'").strip('"') | |||
if word_y != '': | |||
function_content = str(function_content.replace("\"", "'")).replace(word_y, | |||
f'<span class="similarity">{word_y.strip()}</span>') | |||
for word_x in keywords_x: | |||
word_x = word_x.strip().strip("'").strip('"') | |||
if word_x != '': | |||
dup_function_content = str(dup_function_content.replace("\"", "'")).replace(word_x, | |||
f'<span class="similarity">{word_x.strip()}</span>') | |||
# 保存每个维度对应的相似度到idc_project_check_detail | |||
mysql.sql_change_msg( | |||
"""insert into idc_project_check_detail (dup_id, dimension, similarity, function_content, dup_function_content, create_time, update_time) value (%d, "%s", %f, "%s", "%s", "%s", "%s")""" | |||
% (dup_id, wdys2.get(x), similarity, escape_string(function_content), | |||
escape_string(dup_function_content), str(datetime.datetime.now())[:-7], | |||
str(datetime.datetime.now())[:-7])) | |||
# content = content.replace(gjcs, f'<span class="similarity">{gjcs.strip()}</span>') | |||
elif x == 'jsnr': | |||
similarity = cosin_similarity.CosineSimilarity(content_x, content_y) | |||
# 相似度 关键词 | |||
similarity, keywords_x, keywords_y = similarity.main() | |||
similarity = similarity * 40 | |||
# print("**************相似度: %.2f%%" % similarity, "关键词: %s" % keywords_y) | |||
# 相似度相加 | |||
total_similarity += similarity | |||
# 去重 | |||
keywords_y = list(set(keywords_y)) | |||
# 去重 | |||
keywords_x = list(set(keywords_x)) | |||
# 关键词收集 | |||
total_keywords[x] = keywords_y | |||
function_content = content_y | |||
dup_function_content = content_x | |||
for word_y in keywords_y: | |||
word_y = word_y.strip().strip("'").strip('"') | |||
if word_y != '': | |||
function_content = str(function_content.replace("\"", "'")).replace(word_y, | |||
f'<span class="similarity">{word_y.strip()}</span>') | |||
for word_x in keywords_x: | |||
word_x = word_x.strip().strip("'").strip('"') | |||
if word_x != '': | |||
dup_function_content = str(dup_function_content.replace("\"", "'")).replace(word_x, | |||
f'<span class="similarity">{word_x.strip()}</span>') | |||
# 保存每个维度对应的相似度到idc_project_check_detail | |||
mysql.sql_change_msg( | |||
"""insert into idc_project_check_detail (dup_id, dimension, similarity, function_content, dup_function_content, create_time, update_time) value (%d, "%s", %f, "%s", "%s", "%s", "%s")""" | |||
% (dup_id, wdys2.get(x), similarity, escape_string(function_content), | |||
escape_string(dup_function_content), str(datetime.datetime.now())[:-7], | |||
str(datetime.datetime.now())[:-7])) | |||
else: | |||
similarity = cosin_similarity.CosineSimilarity(content_x, content_y) | |||
# 相似度 关键词 | |||
similarity, keywords_x, keywords_y = similarity.main() | |||
similarity = similarity * (60 / dup_count) | |||
# print("**************相似度: %.2f%%" % similarity, "关键词: %s" % keywords_y) | |||
# 相似度相加 | |||
total_similarity += similarity | |||
# 去重 | |||
keywords_y = list(set(keywords_y)) | |||
# 去重 | |||
keywords_x = list(set(keywords_x)) | |||
# 关键词收集 | |||
total_keywords[x] = keywords_y | |||
function_content = content_y | |||
dup_function_content = content_x | |||
for word_y in keywords_y: | |||
word_y = word_y.strip().strip("'").strip('"') | |||
if word_y != '': | |||
function_content = str(function_content.replace("\"", "'")).replace(word_y, | |||
f'<span class="similarity">{word_y.strip()}</span>') | |||
for word_x in keywords_x: | |||
word_x = word_x.strip().strip("'").strip('"') | |||
if word_x != '': | |||
dup_function_content = str(dup_function_content.replace("\"", "'")).replace(word_x, | |||
f'<span class="similarity">{word_x.strip()}</span>') | |||
# 保存每个维度对应的相似度到idc_project_check_detail | |||
mysql.sql_change_msg( | |||
"""insert into idc_project_check_detail (dup_id, dimension, similarity, function_content, dup_function_content, create_time, update_time) value (%d, "%s", %f, "%s", "%s", "%s", "%s")""" | |||
% (dup_id, wdys2.get(x), similarity, escape_string(function_content), | |||
escape_string(dup_function_content), str(datetime.datetime.now())[:-7], | |||
str(datetime.datetime.now())[:-7])) | |||
elif ((xc['jsnr'] == 'None' or xc['jsnr'] is None or str.strip(xc['jsnr']) == '') and (str_dict['jsnr'] is None or str.strip(str_dict['jsnr']) == '')) and ( | |||
not xc['gnmk'] is None and xc['gnmk'] != 'None' and not str_dict['gnmk'] is None and len(str.strip(str_dict['gnmk'])) > 0): | |||
for x in list(xc.keys())[1:]: | |||
content_x = xc.get(x) | |||
content_y = str_dict.get(x) | |||
if content_x and content_y: | |||
if x == 'gnmk': | |||
# 匹配到历史数据,次数加1 | |||
# dup_count += dup_file_test | |||
# 循环遍历每一个维度 | |||
similarity = cosin_similarity.CosineSimilarity(content_x, content_y) | |||
# 相似度 关键词 | |||
similarity, keywords_x, keywords_y = similarity.main() | |||
similarity = similarity * 50 | |||
# print("**************相似度: %.2f%%" % similarity, "关键词: %s" % keywords_y) | |||
# 相似度相加 | |||
total_similarity += similarity | |||
# 去重 | |||
keywords_y = list(set(keywords_y)) | |||
# 去重 | |||
keywords_x = list(set(keywords_x)) | |||
# 关键词收集 | |||
total_keywords[x] = keywords_y | |||
function_content = content_y | |||
dup_function_content = content_x | |||
for word_y in keywords_y: | |||
word_y = word_y.strip().strip("'").strip('"') | |||
if word_y != '': | |||
function_content = str(function_content.replace("\"", "'")).replace(word_y, | |||
f'<span class="similarity">{word_y.strip()}</span>') | |||
for word_x in keywords_x: | |||
word_x = word_x.strip().strip("'").strip('"') | |||
if word_x != '': | |||
dup_function_content = str(dup_function_content.replace("\"", "'")).replace(word_x, | |||
f'<span class="similarity">{word_x.strip()}</span>') | |||
# 保存每个维度对应的相似度到idc_project_check_detail | |||
mysql.sql_change_msg( | |||
"""insert into idc_project_check_detail (dup_id, dimension, similarity, function_content, dup_function_content, create_time, update_time) value (%d, "%s", %f, "%s", "%s", "%s", "%s")""" | |||
% (dup_id, wdys2.get(x), similarity, escape_string(function_content), | |||
escape_string(dup_function_content), str(datetime.datetime.now())[:-7], | |||
str(datetime.datetime.now())[:-7])) | |||
# content = content.replace(gjcs, f'<span class="similarity">{gjcs.strip()}</span>') | |||
elif x == 'jsnr': | |||
similarity = cosin_similarity.CosineSimilarity(content_x, content_y) | |||
# 相似度 关键词 | |||
similarity, keywords_x, keywords_y = similarity.main() | |||
similarity = similarity * 0 | |||
# print("**************相似度: %.2f%%" % similarity, "关键词: %s" % keywords_y) | |||
# 相似度相加 | |||
total_similarity += similarity | |||
# 去重 | |||
keywords_y = list(set(keywords_y)) | |||
# 去重 | |||
keywords_x = list(set(keywords_x)) | |||
# 关键词收集 | |||
total_keywords[x] = keywords_y | |||
function_content = content_y | |||
dup_function_content = content_x | |||
for word_y in keywords_y: | |||
word_y = word_y.strip().strip("'").strip('"') | |||
if word_y != '': | |||
function_content = str(function_content.replace("\"", "'")).replace(word_y, | |||
f'<span class="similarity">{word_y.strip()}</span>') | |||
for word_x in keywords_x: | |||
word_x = word_x.strip().strip("'").strip('"') | |||
if word_x != '': | |||
dup_function_content = str(dup_function_content.replace("\"", "'")).replace(word_x, | |||
f'<span class="similarity">{word_x.strip()}</span>') | |||
# 保存每个维度对应的相似度到idc_project_check_detail | |||
mysql.sql_change_msg( | |||
"""insert into idc_project_check_detail (dup_id, dimension, similarity, function_content, dup_function_content, create_time, update_time) value (%d, "%s", %f, "%s", "%s", "%s", "%s")""" | |||
% (dup_id, wdys2.get(x), similarity, escape_string(function_content), | |||
escape_string(dup_function_content), str(datetime.datetime.now())[:-7], | |||
str(datetime.datetime.now())[:-7])) | |||
else: | |||
similarity = cosin_similarity.CosineSimilarity(content_x, content_y) | |||
# 相似度 关键词 | |||
similarity, keywords_x, keywords_y = similarity.main() | |||
similarity = similarity * (50 / dup_count) | |||
# print("**************相似度: %.2f%%" % similarity, "关键词: %s" % keywords_y) | |||
# 相似度相加 | |||
total_similarity += similarity | |||
# 去重 | |||
keywords_y = list(set(keywords_y)) | |||
# 去重 | |||
keywords_x = list(set(keywords_x)) | |||
# 关键词收集 | |||
total_keywords[x] = keywords_y | |||
function_content = content_y | |||
dup_function_content = content_x | |||
for word_y in keywords_y: | |||
word_y = word_y.strip().strip("'").strip('"') | |||
if word_y != '': | |||
function_content = str(function_content.replace("\"", "'")).replace(word_y, | |||
f'<span class="similarity">{word_y.strip()}</span>') | |||
for word_x in keywords_x: | |||
word_x = word_x.strip().strip("'").strip('"') | |||
if word_x != '': | |||
dup_function_content = str(dup_function_content.replace("\"", "'")).replace(word_x, | |||
f'<span class="similarity">{word_x.strip()}</span>') | |||
# 保存每个维度对应的相似度到idc_project_check_detail | |||
mysql.sql_change_msg( | |||
"""insert into idc_project_check_detail (dup_id, dimension, similarity, function_content, dup_function_content, create_time, update_time) value (%d, "%s", %f, "%s", "%s", "%s", "%s")""" | |||
% (dup_id, wdys2.get(x), similarity, escape_string(function_content), | |||
escape_string(dup_function_content), str(datetime.datetime.now())[:-7], | |||
str(datetime.datetime.now())[:-7])) | |||
else: | |||
for x in list(xc.keys())[1:]: | |||
content_x = xc.get(x) | |||
content_y = str_dict.get(x) | |||
if content_x and content_y: | |||
if x == 'gnmk': | |||
# 匹配到历史数据,次数加1 | |||
# dup_count += dup_file_test | |||
# 循环遍历每一个维度 | |||
similarity = cosin_similarity.CosineSimilarity(content_x, content_y) | |||
# 相似度 关键词 | |||
similarity, keywords_x, keywords_y = similarity.main() | |||
similarity = similarity * 50 | |||
# 相似度相加 | |||
total_similarity += similarity | |||
# 去重 | |||
keywords_y = list(set(keywords_y)) | |||
# 去重 | |||
keywords_x = list(set(keywords_x)) | |||
print("**************相似度: %.2f%%" % similarity, "关键词: %s" % keywords_y) | |||
# 关键词收集 | |||
total_keywords[x] = keywords_y | |||
function_content = content_y | |||
dup_function_content = content_x | |||
for word_y in keywords_y: | |||
word_y = word_y.strip().strip("'").strip('"') | |||
if word_y != '': | |||
function_content = str(function_content.replace("\"", "'")).replace(word_y, | |||
f'<span class="similarity">{word_y.strip()}</span>') | |||
for word_x in keywords_x: | |||
word_x = word_x.strip().strip("'").strip('"') | |||
if word_x != '': | |||
dup_function_content = str(dup_function_content.replace("\"", "'")).replace(word_x, | |||
f'<span class="similarity">{word_x.strip()}</span>') | |||
# 保存每个维度对应的相似度到idc_project_check_detail | |||
mysql.sql_change_msg( | |||
"""insert into idc_project_check_detail (dup_id, dimension, similarity, function_content, dup_function_content, create_time, update_time) value (%d, "%s", %f, "%s", "%s", "%s", "%s")""" | |||
% (dup_id, wdys2.get(x), similarity, escape_string(function_content), | |||
escape_string(dup_function_content), str(datetime.datetime.now())[:-7], | |||
str(datetime.datetime.now())[:-7])) | |||
# content = content.replace(gjcs, f'<span class="similarity">{gjcs.strip()}</span>') | |||
elif x == 'jsnr': | |||
similarity = cosin_similarity.CosineSimilarity(content_x, content_y) | |||
# 相似度 关键词 | |||
similarity, keywords_x, keywords_y = similarity.main() | |||
similarity = similarity * 40 | |||
# 相似度相加 | |||
total_similarity += similarity | |||
# 去重 | |||
keywords_y = list(set(keywords_y)) | |||
# 去重 | |||
keywords_x = list(set(keywords_x)) | |||
print("**************相似度: %.2f%%" % similarity, "关键词: %s" % keywords_y) | |||
# 关键词收集 | |||
total_keywords[x] = keywords_y | |||
function_content = content_y | |||
dup_function_content = content_x | |||
for word_y in keywords_y: | |||
word_y = word_y.strip().strip("'").strip('"') | |||
if word_y != '': | |||
function_content = str(function_content.replace("\"", "'")).replace(word_y, | |||
f'<span class="similarity">{word_y.strip()}</span>') | |||
for word_x in keywords_x: | |||
word_x = word_x.strip().strip("'").strip('"') | |||
if word_x != '': | |||
dup_function_content = str(dup_function_content.replace("\"", "'")).replace(word_x, | |||
f'<span class="similarity">{word_x.strip()}</span>') | |||
# 保存每个维度对应的相似度到idc_project_check_detail | |||
mysql.sql_change_msg( | |||
"""insert into idc_project_check_detail (dup_id, dimension, similarity, function_content, dup_function_content, create_time, update_time) value (%d, "%s", %f, "%s", "%s", "%s", "%s")""" | |||
% (dup_id, wdys2.get(x), similarity, escape_string(function_content), | |||
escape_string(dup_function_content), str(datetime.datetime.now())[:-7], | |||
str(datetime.datetime.now())[:-7])) | |||
else: | |||
similarity = cosin_similarity.CosineSimilarity(content_x, content_y) | |||
# 相似度 关键词 | |||
similarity, keywords_x, keywords_y = similarity.main() | |||
similarity = similarity * (10 / dup_count) | |||
# print("**************相似度: %.2f%%" % similarity, "关键词: %s" % keywords_y) | |||
# 相似度相加 | |||
total_similarity += similarity | |||
# 去重 | |||
keywords_y = list(set(keywords_y)) | |||
# 去重 | |||
keywords_x = list(set(keywords_x)) | |||
# 关键词收集 | |||
total_keywords[x] = keywords_y | |||
function_content = content_y | |||
dup_function_content = content_x | |||
for word_y in keywords_y: | |||
word_y = word_y.strip().strip("'").strip('"') | |||
print(f'word_y = {word_y}') | |||
if word_y != '': | |||
function_content = str(function_content.replace("\"", "'")).replace(word_y, f'<span class="similarity">{word_y.strip()}</span>') | |||
for word_x in keywords_x: | |||
word_x = word_x.strip().strip("'").strip('"') | |||
if word_x != '': | |||
dup_function_content = str(dup_function_content.replace("\"", "'")).replace(word_x, f'<span class="similarity">{word_x.strip()}</span>') | |||
# 保存每个维度对应的相似度到idc_project_check_detail | |||
mysql.sql_change_msg( | |||
"""insert into idc_project_check_detail (dup_id, dimension, similarity, function_content, dup_function_content, create_time, update_time) value (%d, "%s", %f, "%s", "%s", "%s", "%s")""" | |||
% (dup_id, wdys2.get(x), similarity, escape_string(function_content), | |||
escape_string(dup_function_content), str(datetime.datetime.now())[:-7], | |||
str(datetime.datetime.now())[:-7])) | |||
mysql.sql_change_msg( | |||
"""update idc_project_check set similarity=%f where dup_id=%d""" % (total_similarity, dup_id)) | |||
if __name__ == "__main__": | |||
all_path = requests.get("http://127.0.0.1:19099/check/duplicates/%s" % 599).json() | |||
# print(all_path) | |||
# dict1 = {k:v for k, v in sorted(dict.items(), key= lambda item : item[1])} | |||
# print(dict1) | |||
data_list = [] | |||
for ap in all_path.get("data"): | |||
# if os.path.exists(ap.get("file_path")): | |||
data_list.append((ap.get("project_id"), ap.get("file_path"), ap.get("project_name"))) | |||
print(data_list) | |||
# data_list = [(11, r"C:\Users\HUAWEI\PycharmProjects\nlp\dup_check\0825-丽水系统查重维度1.xlsx", "水路运输综合监管系统建设项目.xls")] | |||
data_list = [(11, r"D:\ningda\dup_check2\dup_check\0825-丽水系统查重维度1.xlsx", "水路运输综合监管系统建设项目.xls")] | |||
project_check(data_list) |
@@ -0,0 +1,550 @@ | |||
# coding=utf-8 | |||
import sys | |||
import re | |||
import baidu | |||
import model_scope | |||
import mysql_pool | |||
from pymysql.converters import escape_string | |||
import cosin_similarity | |||
import pandas as pd | |||
import datetime | |||
import requests | |||
import glm_utils | |||
from threading import Thread | |||
wdys1 = { | |||
"项目名称": "xmmc", | |||
"现状问题": "xzwt", | |||
"系统基础": "xtjc", | |||
"项目目标": "xmmb", | |||
"预期绩效": "yqjx", | |||
"建设需求": "jsxq", | |||
"数据需求": "sjxq", | |||
"安全需求": "aqxq", | |||
"业务领域": "ywly", | |||
"核心业务": "hxyw", | |||
"业务需求": "ywxq", | |||
"业务协同": "ywxt", | |||
"建设层级": "jscj", | |||
"用户范围": "yhfw", | |||
"目标群体": "mbqt", | |||
"建设内容": "jsnr", | |||
"功能模块": "gnmk", | |||
"数据共享": "sjgx", | |||
"智能要素": "znys" | |||
} | |||
wdys2 = { | |||
"xmmc": "项目名称", | |||
"xzwt": "现状问题", | |||
"xtjc": "系统基础", | |||
"xmmb": "项目目标", | |||
"yqjx": "预期绩效", | |||
"jsxq": "建设需求", | |||
"sjxq": "数据需求", | |||
"aqxq": "安全需求", | |||
"ywly": "业务领域", | |||
"hxyw": "核心业务", | |||
"ywxq": "业务需求", | |||
"ywxt": "业务协同", | |||
"jscj": "建设层级", | |||
"yhfw": "用户范围", | |||
"mbqt": "目标群体", | |||
"jsnr": "建设内容", | |||
"gnmk": "功能模块", | |||
"sjgx": "数据共享", | |||
"znys": "智能要素" | |||
} | |||
gnmkys = { | |||
"gnmc": "功能名称", | |||
"gnms": "功能描述" | |||
} | |||
def getFlag(): | |||
data_dict = {} | |||
df = pd.read_excel("0825.xlsx") | |||
data = df.values | |||
data = list(pd.Series(data[:, 1]).dropna()) | |||
for d in data: | |||
try: | |||
wd = re.search("(.*?)(.*?%)", d).group(1).strip() | |||
wdc = wdys1.get(wd) | |||
if wdc: | |||
qz = re.search(".*?((.*?%))", d).group(1) | |||
data_dict[wdc] = qz | |||
except: | |||
pass | |||
return data_dict | |||
def gong_neng_mo_kuai(mysql, dl, data, er_title, str_dict_new): | |||
nlp = model_scope.Bert_nlp("corom") | |||
# 将excel文件中的所有第三维度内容进行拼接 | |||
str_dict = {} | |||
for et in er_title: | |||
for d in data: | |||
if d[1] == et: | |||
if str_dict.get(et): | |||
str_dict[et] = str_dict.get(et) + d[3] | |||
else: | |||
str_dict[et] = d[3] | |||
for k, v in str_dict.items(): | |||
mysql.sql_change_msg( | |||
"""insert into idc_project_module (project_id, check_duplicate_count, module_name, module_content, create_time, update_time, tag) value(%d, 1, "%s", "%s", "%s", "%s", "模块")""" % ( | |||
int(dl[0]), k, v, str(datetime.datetime.now())[:-7], str(datetime.datetime.now())[:-7])) | |||
module_id_list = mysql.sql_select_many( | |||
"""select project_module_id, module_name, module_content from idc_project_module where project_id=%d""" % dl[ | |||
0]) | |||
data_list = [] | |||
for mil in module_id_list: | |||
data_dict = {} | |||
data_dict["project_module_id"] = mil.get("project_module_id") | |||
data_dict["gnmc"] = mil.get("module_name") | |||
data_dict["gnms"] = mil.get("module_content") | |||
data_list.append(data_dict) | |||
for i in data_list: | |||
# where xmmc = '南浔区信息化项目全生命周期管理系统' | |||
gnmk_copy1 = mysql.sql_select_many("""select * from user_history_module_data """) | |||
if gnmk_copy1: | |||
# desc_info_list = [] | |||
# for gc in gnmk_copy1: | |||
# if gc.get("xmmc") != dl[2]: | |||
# desc_info_list.append(gc.get("gnms")) | |||
# similarity, s1, s2, idx = nlp.main(i.get("gnms"), desc_info_list) | |||
# if idx == -1: | |||
# continue | |||
for gc in gnmk_copy1: | |||
desc = glm_utils.qwenResult(i.get("gnms"), gc.get("gnms")) | |||
similarity_result, count = similarity_result_check(desc) | |||
similarity = count | |||
mysql.sql_change_msg( | |||
"""insert into idc_project_module_check (project_module_id, module_name, project_name, company_name, create_time, update_time, similarity_result) value(%d, "%s", "%s", "%s", "%s", "%s", "%s")""" | |||
% ( | |||
i.get("project_module_id"), escape_string(gc.get("gnmc")), escape_string(gc.get("xmmc")), "", | |||
str(datetime.datetime.now())[:-7], | |||
str(datetime.datetime.now())[:-7], similarity_result)) | |||
dup_module_id = mysql.cur.lastrowid | |||
check_module_info(mysql, gc, dl, i, dup_module_id, similarity) | |||
def check_module_info(mysql, gc, dl, pro, dup_module_id, score): | |||
total_similarity1 = 0 | |||
total_similarity2 = 0 | |||
for j in ["gnmc", "gnms"]: | |||
# 循环遍历每一个模块名称 | |||
content_x = gc.get(j) | |||
content_y = pro.get(j) | |||
if content_x and content_y: | |||
if j == "gnmc": | |||
# print("功能名称对比") | |||
similarity, check_desc = glm_utils.AutoDLResult(f"""请帮我分析以下两段重复语句重复的地方: \n第一段话是:'{content_y}', \n ----------------- \n 第二段话是:'{content_x}'""") | |||
# # 相似度相加 | |||
if similarity is None: | |||
similarity = 0 | |||
print(f"similarity is {similarity}") | |||
total_similarity1 += similarity/100 | |||
mysql.sql_change_msg( | |||
"""insert into idc_project_module_check_detail (dup_module_id, project_name, module_content, dup_module_content, similarity, dimension, create_time, update_time, check_desc) value (%d, "%s", "%s", "%s", %f, "%s", "%s", "%s", "%s")""" | |||
% (dup_module_id, dl[2], escape_string(content_y), escape_string(content_x), similarity, | |||
"功能名称", | |||
str(datetime.datetime.now())[:-7], str(datetime.datetime.now())[:-7], escape_string(check_desc))) | |||
else: | |||
check_desc = glm_utils.AutoDLResultNoNum(f"""请帮我分析以下两段重复语句重复的地方: \n第一段话是:'{content_y}', \n ----------------- \n 第二段话是:'{content_x}'""") | |||
similarity = score | |||
# 相似度相加 gnms | |||
total_similarity2 += similarity | |||
module_content = pro.get("gnms") | |||
dup_module_content = gc.get("gnms") | |||
mysql.sql_change_msg( | |||
"""insert into idc_project_module_check_detail (dup_module_id, project_name, module_content, dup_module_content, similarity, dimension, create_time, update_time, check_desc) value (%d, "%s", "%s", "%s", %f, "%s", "%s", "%s", "%s")""" | |||
% (dup_module_id, dl[2], escape_string(module_content), escape_string(dup_module_content), | |||
similarity, | |||
"功能模块描述", | |||
str(datetime.datetime.now())[:-7], str(datetime.datetime.now())[:-7], | |||
escape_string(check_desc))) | |||
mysql.sql_change_msg("""update idc_project_module_check set similarity=%f where dup_module_id=%d""" % ( | |||
total_similarity1 + total_similarity2, dup_module_id)) | |||
def project_check(data_list): | |||
mysql = mysql_pool.ConnMysql() | |||
# mysql.sql_select_many("""select * from mkgjc""") | |||
# 读取历史数据 | |||
xmnr_count = len(mysql.sql_select_many("""select * from user_history_data""")) | |||
gnmk_count = len(mysql.sql_select_many("""select * from user_history_module_data""")) | |||
nlp = model_scope.Bert_nlp("corom") | |||
# 遍历excel存储路径 | |||
for dl in data_list: | |||
# path = "0825-丽水系统查重维度1.xlsx" | |||
# 读取路径下的excel | |||
print(dl,dl[1]) | |||
df = pd.read_excel(dl[1]) | |||
data = df.values | |||
# 将excel文件中的所有维度内容进行拼接 | |||
join_str = "" | |||
str_dict = {} | |||
gnmk_str = [] | |||
title = "" | |||
er_title = set() | |||
for d in data: | |||
# if pd.notnull(d[0]): | |||
# title = d[0] | |||
# if title == "功能模块": | |||
# er_title.add(d[1]) | |||
# join_str = "" | |||
# for i in d[1:]: | |||
# if pd.notnull(i): | |||
# join_str += str(i) | |||
# str_dict[wdys1.get(title)] = join_str | |||
if pd.notnull(d[0]): | |||
title = d[0] | |||
if title == "功能模块": | |||
er_title.add(d[1]) | |||
join_str = "" | |||
for i in d[1:]: | |||
if pd.notnull(i): | |||
join_str += str(i) | |||
if title == "功能模块": | |||
if i == '功能描述': | |||
continue | |||
else: | |||
gnmk_str.append(i) | |||
str_dict[wdys1.get(title)] = join_str | |||
else: | |||
if title == "功能模块": | |||
er_title.add(d[1]) | |||
for i in d[1:]: | |||
if pd.notnull(i): | |||
join_str += str(i) | |||
str_dict[wdys1.get(title)] = str_dict.get(wdys1.get(title)) + join_str | |||
# print(str_dict) | |||
gnmk = ",".join(gnmk_str) | |||
str_dict['gnmk'] = gnmk | |||
mysql.sql_change_msg( | |||
"""insert into user_data (xmmc, xzwt, xtjc, xmmb, yqjx, jsxq, sjxq, aqxq, ywly, hxyw, ywxq, ywxt, jscj, yhfw, mbqt, jsnr, gnmk, sjgx, znys) value ("%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s")""" | |||
% (dl[0], str_dict.get("xzwt") if str_dict.get("xzwt") else None, | |||
str_dict.get("xtjc") if str_dict.get("xtjc") else None, | |||
str_dict.get("xmmb") if str_dict.get("xmmb") else None, | |||
str_dict.get("yqjx") if str_dict.get("yqjx") else None, | |||
str_dict.get("jsxq") if str_dict.get("jsxq") else None, | |||
str_dict.get("sjxq") if str_dict.get("sjxq") else None, | |||
str_dict.get("aqxq") if str_dict.get("aqxq") else None, | |||
str_dict.get("ywly") if str_dict.get("ywly") else None, | |||
str_dict.get("hxyw") if str_dict.get("hxyw") else None, | |||
str_dict.get("ywxq") if str_dict.get("ywxq") else None, | |||
str_dict.get("ywxt") if str_dict.get("ywxt") else None, | |||
str_dict.get("jscj") if str_dict.get("jscj") else None, | |||
str_dict.get("yhfw") if str_dict.get("yhfw") else None, | |||
str_dict.get("mbqt") if str_dict.get("mbqt") else None, | |||
str_dict.get("jsnr") if str_dict.get("jsnr") else None, | |||
str_dict.get("gnmk") if str_dict.get("gnmk") else None, | |||
str_dict.get("sjgx") if str_dict.get("sjgx") else None, | |||
str_dict.get("znys") if str_dict.get("znys") else None)) | |||
# 或取所有的xmnr_copy1 where xmmc = '南浔区信息化项目全生命周期管理系统' | |||
xmnr_copy1 = mysql.sql_select_many("""select * from user_history_data where xmmc = '富阳未来社区(乡村)一体化数智平台' """) | |||
# 对比xmnr_copy1和xmnr维度是否都有 | |||
if xmnr_copy1: | |||
# threads = [Thread(target=check_project_info, args=(mysql, dl, xc, str_dict)) for xc in xmnr_copy1] | |||
# for t in threads: | |||
# t.start() | |||
# | |||
# for t in threads: | |||
# t.join() | |||
# pro_ths = [] | |||
# for xc in xmnr_copy1: | |||
# # check_project_info(mysql, dl, xc, str_dict) | |||
# p = Thread(target=check_project_info, args=(mysql, dl, xc, str_dict)) | |||
# pro_ths.append(p) | |||
# p.start() | |||
# for p in pro_ths: | |||
# p.join() | |||
xmnr_copy1_new = [] | |||
for xc in xmnr_copy1: | |||
if xc["xmmc"] == str_dict.get("xmmc"): | |||
continue | |||
check_project_info(mysql, dl, xc, str_dict, nlp) | |||
# 找出相识对最高的项目通过glm分析 | |||
mysql.sql_change_msg( | |||
"""update idc_project set dup_status=3, one_vote_veto_status=1, self_check_status=1, history_project_count=%d ,module_count=%d where project_id=%d""" % ( | |||
xmnr_count, gnmk_count, dl[0])) | |||
gong_neng_mo_kuai(mysql, dl, data, er_title, str_dict) | |||
def check_project_info(mysql, dl, xc, str_dict, nlp): | |||
total_keywords = {} | |||
total_similarity = 0 | |||
dup_count = 0 | |||
# 保存相加后的相似度到idc_project_check | |||
mysql.sql_change_msg( | |||
"""insert into idc_project_check (project_id, dup_project_name, file_path, company_name, create_year, project_tag, project_range_tag, project_area, create_time, update_time) value ("%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s")""" | |||
% (dl[0], escape_string(xc.get("xmmc")), escape_string(dl[1]), "", "", "需求相似、业务相似", "历史项目", "", | |||
str(datetime.datetime.now())[:-7], str(datetime.datetime.now())[:-7])) | |||
dup_id = mysql.cur.lastrowid | |||
for x in list(xc.keys())[1:]: | |||
content_x = xc.get(x) | |||
content_y = str_dict.get(x) | |||
if content_x and content_y: | |||
if x == 'gnmk': | |||
continue | |||
elif x == 'jsnr': | |||
continue | |||
else: | |||
dup_count += 1 | |||
if ((xc['gnmk'] == 'None' or xc['gnmk'] is None or str.strip(xc['gnmk']) == '') and (str_dict['gnmk'] is None or str.strip(str_dict['gnmk']) == '')) and ( | |||
not xc['jsnr'] is None and xc['jsnr'] != 'None' and not str_dict['jsnr'] is None and len(str.strip(str_dict['jsnr'])) > 0): | |||
for x in list(xc.keys())[1:]: | |||
content_x = xc.get(x) | |||
content_y = str_dict.get(x) | |||
if content_x and content_y: | |||
if x == 'gnmk': | |||
# 循环遍历每一个维度 | |||
# contents_y = [] | |||
# contents_y.append(content_y) | |||
# similarity, content1, content2, idx = nlp.main(content_x, contents_y) | |||
desc = glm_utils.qwenResult(content_y, content_x) | |||
similarity_result, count = similarity_result_check(desc) | |||
similarity = count * 0 | |||
# print("**************相似度: %.2f%%" % similarity, "关键词: %s" % keywords_y) | |||
# 相似度相加 | |||
total_similarity += similarity | |||
function_content = content_y | |||
dup_function_content = content_x | |||
# 保存每个维度对应的相似度到idc_project_check_detail | |||
mysql.sql_change_msg( | |||
"""insert into idc_project_check_detail (dup_id, dimension, similarity, function_content, dup_function_content, create_time, update_time, similarity_result) value (%d, "%s", %f, "%s", "%s", "%s", "%s", "%s")""" | |||
% (dup_id, wdys2.get(x), similarity, escape_string(function_content), | |||
escape_string(dup_function_content), str(datetime.datetime.now())[:-7], | |||
str(datetime.datetime.now())[:-7], similarity_result)) | |||
elif x == 'jsnr': | |||
# 循环遍历每一个维度 | |||
# contents_y = [] | |||
# contents_y.append(content_y) | |||
# similarity, content1, content2, idx = nlp.main(content_x, contents_y) | |||
desc = glm_utils.qwenResult(content_y, content_x) | |||
similarity_result, count = similarity_result_check(desc) | |||
similarity = count * 40 | |||
# print("**************相似度: %.2f%%" % similarity, "关键词: %s" % keywords_y) | |||
# 相似度相加 | |||
total_similarity += similarity | |||
function_content = content_y | |||
dup_function_content = content_x | |||
# 保存每个维度对应的相似度到idc_project_check_detail | |||
mysql.sql_change_msg( | |||
"""insert into idc_project_check_detail (dup_id, dimension, similarity, function_content, dup_function_content, create_time, update_time, similarity_result) value (%d, "%s", %f, "%s", "%s", "%s", "%s", "%s")""" | |||
% (dup_id, wdys2.get(x), similarity, escape_string(function_content), | |||
escape_string(dup_function_content), str(datetime.datetime.now())[:-7], | |||
str(datetime.datetime.now())[:-7], similarity_result)) | |||
else: | |||
# 循环遍历每一个维度 | |||
# contents_y = [] | |||
# contents_y.append(content_y) | |||
# similarity, content1, content2, idx = nlp.main(content_x, contents_y) | |||
desc = glm_utils.qwenResult(content_y, content_x) | |||
similarity_result, count = similarity_result_check(desc) | |||
similarity = count * (60 / dup_count) | |||
# print("**************相似度: %.2f%%" % similarity, "关键词: %s" % keywords_y) | |||
# 相似度相加 | |||
total_similarity += similarity | |||
function_content = content_y | |||
dup_function_content = content_x | |||
# 保存每个维度对应的相似度到idc_project_check_detail | |||
mysql.sql_change_msg( | |||
"""insert into idc_project_check_detail (dup_id, dimension, similarity, function_content, dup_function_content, create_time, update_time, similarity_result) value (%d, "%s", %f, "%s", "%s", "%s", "%s", "%s")""" | |||
% (dup_id, wdys2.get(x), similarity, escape_string(function_content), | |||
escape_string(dup_function_content), str(datetime.datetime.now())[:-7], | |||
str(datetime.datetime.now())[:-7], similarity_result)) | |||
elif ((xc['jsnr'] == 'None' or xc['jsnr'] is None or str.strip(xc['jsnr']) == '') and (str_dict['jsnr'] is None or str.strip(str_dict['jsnr']) == '')) and ( | |||
not xc['gnmk'] is None and xc['gnmk'] != 'None' and not str_dict['gnmk'] is None and len(str.strip(str_dict['gnmk'])) > 0): | |||
for x in list(xc.keys())[1:]: | |||
content_x = xc.get(x) | |||
content_y = str_dict.get(x) | |||
if content_x and content_y: | |||
if x == 'gnmk': | |||
# 循环遍历每一个维度 | |||
# contents_y = [] | |||
# contents_y.append(content_y) | |||
# similarity, content1, content2, idx = nlp.main(content_x, contents_y) | |||
desc = glm_utils.qwenResult(content_y, content_x) | |||
similarity_result, count = similarity_result_check(desc) | |||
similarity = count * 50 | |||
# print("**************相似度: %.2f%%" % similarity, "关键词: %s" % keywords_y) | |||
# 相似度相加 | |||
total_similarity += similarity | |||
function_content = content_y | |||
dup_function_content = content_x | |||
# 保存每个维度对应的相似度到idc_project_check_detail | |||
mysql.sql_change_msg( | |||
"""insert into idc_project_check_detail (dup_id, dimension, similarity, function_content, dup_function_content, create_time, update_time, similarity_result) value (%d, "%s", %f, "%s", "%s", "%s", "%s", "%s")""" | |||
% (dup_id, wdys2.get(x), similarity, escape_string(function_content), | |||
escape_string(dup_function_content), str(datetime.datetime.now())[:-7], | |||
str(datetime.datetime.now())[:-7], similarity_result)) | |||
elif x == 'jsnr': | |||
# 循环遍历每一个维度 | |||
# contents_y = [] | |||
# contents_y.append(content_y) | |||
# similarity, content1, content2, idx = nlp.main(content_x, contents_y) | |||
desc = glm_utils.qwenResult(content_y, content_x) | |||
similarity_result, count = similarity_result_check(desc) | |||
similarity = count * 0 | |||
# print("**************相似度: %.2f%%" % similarity, "关键词: %s" % keywords_y) | |||
# 相似度相加 | |||
total_similarity += similarity | |||
function_content = content_y | |||
dup_function_content = content_x | |||
# 保存每个维度对应的相似度到idc_project_check_detail | |||
mysql.sql_change_msg( | |||
"""insert into idc_project_check_detail (dup_id, dimension, similarity, function_content, dup_function_content, create_time, update_time, similarity_result) value (%d, "%s", %f, "%s", "%s", "%s", "%s", "%s")""" | |||
% (dup_id, wdys2.get(x), similarity, escape_string(function_content), | |||
escape_string(dup_function_content), str(datetime.datetime.now())[:-7], | |||
str(datetime.datetime.now())[:-7], similarity_result)) | |||
else: | |||
# 循环遍历每一个维度 | |||
# contents_y = [] | |||
# contents_y.append(content_y) | |||
# similarity, content1, content2, idx = nlp.main(content_x, contents_y) | |||
desc = glm_utils.qwenResult(content_y, content_x) | |||
similarity_result, count = similarity_result_check(desc) | |||
similarity = count * (50 / dup_count) | |||
# print("**************相似度: %.2f%%" % similarity, "关键词: %s" % keywords_y) | |||
# 相似度相加 | |||
total_similarity += similarity | |||
function_content = content_y | |||
dup_function_content = content_x | |||
# 保存每个维度对应的相似度到idc_project_check_detail | |||
mysql.sql_change_msg( | |||
"""insert into idc_project_check_detail (dup_id, dimension, similarity, function_content, dup_function_content, create_time, update_time, similarity_result) value (%d, "%s", %f, "%s", "%s", "%s", "%s", "%s")""" | |||
% (dup_id, wdys2.get(x), similarity, escape_string(function_content), | |||
escape_string(dup_function_content), str(datetime.datetime.now())[:-7], | |||
str(datetime.datetime.now())[:-7], similarity_result)) | |||
else: | |||
for x in list(xc.keys())[1:]: | |||
content_x = xc.get(x) | |||
content_y = str_dict.get(x) | |||
if content_x and content_y: | |||
if x == 'gnmk': | |||
# 循环遍历每一个维度 | |||
# contents_y = [] | |||
# contents_y.append(content_y) | |||
# similarity, content1, content2, idx = nlp.main(content_x, contents_y) | |||
desc = glm_utils.qwenResult(content_y, content_x) | |||
similarity_result, count = similarity_result_check(desc) | |||
similarity = count * 50 | |||
# 相似度相加 | |||
total_similarity += similarity | |||
function_content = content_y | |||
dup_function_content = content_x | |||
# 保存每个维度对应的相似度到idc_project_check_detail | |||
mysql.sql_change_msg( | |||
"""insert into idc_project_check_detail (dup_id, dimension, similarity, function_content, dup_function_content, create_time, update_time, similarity_result) value (%d, "%s", %f, "%s", "%s", "%s", "%s", "%s")""" | |||
% (dup_id, wdys2.get(x), similarity, escape_string(function_content), | |||
escape_string(dup_function_content), str(datetime.datetime.now())[:-7], | |||
str(datetime.datetime.now())[:-7], similarity_result)) | |||
elif x == 'jsnr': | |||
# 循环遍历每一个维度 | |||
# contents_y = [] | |||
# contents_y.append(content_y) | |||
# similarity, content1, content2, idx = nlp.main(content_x, contents_y) | |||
desc = glm_utils.qwenResult(content_y, content_x) | |||
similarity_result, count = similarity_result_check(desc) | |||
similarity = count * 40 | |||
# 相似度相加 | |||
total_similarity += similarity | |||
function_content = content_y | |||
dup_function_content = content_x | |||
# 保存每个维度对应的相似度到idc_project_check_detail | |||
mysql.sql_change_msg( | |||
"""insert into idc_project_check_detail (dup_id, dimension, similarity, function_content, dup_function_content, create_time, update_time, similarity_result) value (%d, "%s", %f, "%s", "%s", "%s", "%s", "%s")""" | |||
% (dup_id, wdys2.get(x), similarity, escape_string(function_content), | |||
escape_string(dup_function_content), str(datetime.datetime.now())[:-7], | |||
str(datetime.datetime.now())[:-7], similarity_result)) | |||
else: | |||
# 循环遍历每一个维度 | |||
# contents_y = [] | |||
# contents_y.append(content_y) | |||
# similarity, content1, content2, idx = nlp.main(content_x, contents_y) | |||
desc = glm_utils.qwenResult(content_y, content_x) | |||
similarity_result, count = similarity_result_check(desc) | |||
similarity = count * (10 / dup_count) | |||
# print("**************相似度: %.2f%%" % similarity, "关键词: %s" % keywords_y) | |||
# 相似度相加 | |||
total_similarity += similarity | |||
function_content = content_y | |||
dup_function_content = content_x | |||
# 保存每个维度对应的相似度到idc_project_check_detail | |||
mysql.sql_change_msg( | |||
"""insert into idc_project_check_detail (dup_id, dimension, similarity, function_content, dup_function_content, create_time, update_time, similarity_result) value (%d, "%s", %f, "%s", "%s", "%s", "%s", "%s")""" | |||
% (dup_id, wdys2.get(x), similarity, escape_string(function_content), | |||
escape_string(dup_function_content), str(datetime.datetime.now())[:-7], | |||
str(datetime.datetime.now())[:-7], similarity_result)) | |||
mysql.sql_change_msg( | |||
"""update idc_project_check set similarity=%f where dup_id=%d""" % (total_similarity, dup_id)) | |||
if __name__ == "__main__": | |||
all_path = requests.get("http://127.0.0.1:19099/check/duplicates/%s" % 599).json() | |||
# print(all_path) | |||
# dict1 = {k:v for k, v in sorted(dict.items(), key= lambda item : item[1])} | |||
# print(dict1) | |||
data_list = [] | |||
for ap in all_path.get("data"): | |||
# if os.path.exists(ap.get("file_path")): | |||
data_list.append((ap.get("project_id"), ap.get("file_path"), ap.get("project_name"))) | |||
print(data_list) | |||
# data_list = [(11, r"C:\Users\HUAWEI\PycharmProjects\nlp\dup_check\0825-丽水系统查重维度1.xlsx", "水路运输综合监管系统建设项目.xls")] | |||
data_list = [(11, r"D:\ningda\dup_check2\dup_check\0825-丽水系统查重维度1.xlsx", "水路运输综合监管系统建设项目.xls")] | |||
project_check(data_list) | |||
# 对比相似度 | |||
def similarity_result_check(desc): | |||
similarity_result = "" | |||
similarity_result_count = 0 | |||
if len(desc) > 7: | |||
if desc[6:7] == "高": | |||
similarity_result = "非常相似" | |||
similarity_result_count = 90 | |||
elif desc[6:7] == "中": | |||
similarity_result = "比较相似" | |||
similarity_result_count = 60 | |||
elif desc[6:7] == "低": | |||
similarity_result = "相似度低" | |||
similarity_result_count = 30 | |||
return similarity_result, similarity_result_count |
@@ -0,0 +1,391 @@ | |||
# coding=utf-8 | |||
import sys | |||
import re | |||
import mysql_pool | |||
from pymysql.converters import escape_string | |||
import cosin_similarity | |||
import pandas as pd | |||
import datetime | |||
import requests | |||
import os | |||
wdys1 = { | |||
"项目名称": "xmmc", | |||
"现状问题": "xzwt", | |||
"系统基础": "xtjc", | |||
"项目目标": "xmmb", | |||
"预期绩效": "yqjx", | |||
"建设需求": "jsxq", | |||
"数据需求": "sjxq", | |||
"安全需求": "aqxq", | |||
"业务领域": "ywly", | |||
"核心业务": "hxyw", | |||
"业务需求": "ywxq", | |||
"业务协同": "ywxt", | |||
"建设层级": "jscj", | |||
"用户范围": "yhfw", | |||
"目标群体": "mbqt", | |||
"建设内容": "jsnr", | |||
"功能模块": "gnmk", | |||
"数据共享": "sjgx", | |||
"智能要素": "znys" | |||
} | |||
wdys2 = { | |||
"xmmc": "项目名称", | |||
"xzwt": "现状问题", | |||
"xtjc": "系统基础", | |||
"xmmb": "项目目标", | |||
"yqjx": "预期绩效", | |||
"jsxq": "建设需求", | |||
"sjxq": "数据需求", | |||
"aqxq": "安全需求", | |||
"ywly": "业务领域", | |||
"hxyw": "核心业务", | |||
"ywxq": "业务需求", | |||
"ywxt": "业务协同", | |||
"jscj": "建设层级", | |||
"yhfw": "用户范围", | |||
"mbqt": "目标群体", | |||
"jsnr": "建设内容", | |||
"gnmk": "功能模块", | |||
"sjgx": "数据共享", | |||
"znys": "智能要素" | |||
} | |||
gnmkys = { | |||
"gnmc": "功能名称", | |||
"gnms": "功能描述" | |||
} | |||
def getFlag(): | |||
data_dict = {} | |||
df = pd.read_excel("0825.xlsx") | |||
data = df.values | |||
data = list(pd.Series(data[:, 1]).dropna()) | |||
for d in data: | |||
try: | |||
wd = re.search("(.*?)(.*?%)", d).group(1).strip() | |||
wdc = wdys1.get(wd) | |||
if wdc: | |||
qz = re.search(".*?((.*?%))", d).group(1) | |||
data_dict[wdc] = qz | |||
except: | |||
pass | |||
return data_dict | |||
def gong_neng_mo_kuai(mysql, dl, data, er_title): | |||
# 将excel文件中的所有第三维度内容进行拼接 | |||
str_dict = {} | |||
for et in er_title: | |||
for d in data: | |||
if d[1] == et: | |||
if str_dict.get(et): | |||
str_dict[et] = str_dict.get(et) + d[3] | |||
else: | |||
str_dict[et] = d[3] | |||
# print(str_dict) | |||
for k, v in str_dict.items(): | |||
mysql.sql_change_msg( | |||
"""insert into idc_project_module (project_id, check_duplicate_count, module_name, module_content, create_time, update_time, tag) value(%d, 1, '%s', '%s', '%s', '%s', "模块")""" % ( | |||
int(dl[0]), k, v, str(datetime.datetime.now())[:-7], str(datetime.datetime.now())[:-7])) | |||
module_id_list = mysql.sql_select_many( | |||
"""select project_module_id, module_name, module_content from idc_project_module where project_id=%d""" % dl[ | |||
0]) | |||
data_list = [] | |||
for mil in module_id_list: | |||
data_dict = {} | |||
data_dict["project_module_id"] = mil.get("project_module_id") | |||
data_dict["gnmc"] = mil.get("module_name") | |||
data_dict["gnms"] = mil.get("module_content") | |||
data_list.append(data_dict) | |||
# print(data_list) | |||
for i in data_list: | |||
gnmk_copy1 = mysql.sql_select_many("""select * from user_history_module_data""") | |||
if gnmk_copy1: | |||
for gc in gnmk_copy1: | |||
total_similarity1 = 0 | |||
total_keywords1 = [] | |||
total_similarity2 = 0 | |||
total_keywords2 = [] | |||
mysql.sql_change_msg( | |||
"""insert into idc_project_module_check (project_module_id, module_name, project_name, company_name, create_time, update_time) value(%d, '%s', '%s', '%s', '%s', '%s')""" | |||
% ( | |||
i.get("project_module_id"), gc.get("gnmc"), gc.get("xmmc"), "", | |||
str(datetime.datetime.now())[:-7], | |||
str(datetime.datetime.now())[:-7])) | |||
dup_module_id = mysql.cur.lastrowid | |||
for j in ["gnmc", "gnms"]: | |||
# 循环遍历每一个模块名称 | |||
content_x = gc.get(j) | |||
content_y = i.get(j) | |||
if content_x and content_y: | |||
if j == "gnmc": | |||
similarity = cosin_similarity.CosineSimilarity(content_x, content_y) | |||
# 相似度 关键词 | |||
similarity, keyword_x, keywords = similarity.main() | |||
similarity = similarity * 1 | |||
total_keywords1 += keywords | |||
print("######################相似度: %.2f%%" % similarity, "关键词: %s" % keywords) | |||
# 相似度相加 | |||
total_similarity1 += similarity | |||
mysql.sql_change_msg( | |||
"""insert into idc_project_module_check_detail (dup_module_id, project_name, module_content, dup_module_content, similarity, dimension, create_time, update_time) value (%d, '%s', '%s', '%s', %f, '%s', '%s', '%s')""" | |||
% (dup_module_id, dl[2], escape_string(content_y), escape_string(content_x), similarity, | |||
"功能名称", | |||
str(datetime.datetime.now())[:-7], str(datetime.datetime.now())[:-7])) | |||
else: | |||
similarity = cosin_similarity.CosineSimilarity(content_x, content_y) | |||
# 相似度 关键词 | |||
similarity, keyword_x, keywords = similarity.main() | |||
similarity = similarity * 99 | |||
total_keywords2 += keywords | |||
print("######################相似度: %.2f%%" % similarity, "关键词: %s" % keywords) | |||
# 相似度相加 | |||
total_similarity2 += similarity | |||
mysql.sql_change_msg( | |||
"""insert into idc_project_module_check_detail (dup_module_id, project_name, module_content, dup_module_content, similarity, dimension, create_time, update_time) value (%d, '%s', '%s', '%s', %f, '%s', '%s', '%s')""" | |||
% (dup_module_id, dl[2], escape_string(content_y), escape_string(content_x), similarity, | |||
"功能模块描述", | |||
str(datetime.datetime.now())[:-7], str(datetime.datetime.now())[:-7])) | |||
mysql.sql_change_msg("""update idc_project_module_check set similarity=%f where dup_module_id=%d""" % ( | |||
total_similarity1 + total_similarity2, dup_module_id)) | |||
gnmk_gjc = {} | |||
for a in ["gnmc", "gnms"]: | |||
if i.get(a): | |||
content_x = i.get(a) | |||
content_y = i.get(a) | |||
if a == "gnmc": | |||
similarity = cosin_similarity.CosineSimilarity(content_x, content_y) | |||
# 相似度 关键词 | |||
similarity, keyword_x, keywords = similarity.main() | |||
gnmk_gjc[a] = keywords | |||
else: | |||
similarity = cosin_similarity.CosineSimilarity(content_x, content_y) | |||
# 相似度 关键词 | |||
similarity, keyword_x, keywords = similarity.main() | |||
gnmk_gjc[a] = keywords | |||
mysql.sql_change_msg("""insert into user_module_keywords (xmmc, gnmc, gnms) value('%s', '%s', '%s')""" % ( | |||
dl[2], str(gnmk_gjc.get("gnmc"))[1:-1] if gnmk_gjc.get("gnmc") else None, | |||
str(gnmk_gjc.get("gnms"))[1:-1] if gnmk_gjc.get("gnms") else None)) | |||
def project_check(data_list): | |||
mysql = mysql_pool.ConnMysql() | |||
# mysql.sql_select_many("""select * from mkgjc""") | |||
# 读取维度和权重 | |||
# xmnr_count = len(mysql.sql_select_many("""select * from xmnr_copy1""")) | |||
# gnmk_count = len(mysql.sql_select_many("""select * from gnmk_copy1""")) | |||
xmnr_count = len(mysql.sql_select_many("""select * from user_history_data""")) | |||
gnmk_count = len(mysql.sql_select_many("""select * from user_history_module_data""")) | |||
get_data_dict = getFlag() | |||
# 遍历excel存储路径 | |||
for dl in data_list: | |||
# path = "0825-丽水系统查重维度1.xlsx" | |||
# 读取路径下的excel | |||
df = pd.read_excel(dl[1]) | |||
data = df.values | |||
# 将excel文件中的所有维度内容进行拼接 | |||
join_str = "" | |||
str_dict = {} | |||
title = "" | |||
er_title = set() | |||
for d in data: | |||
if pd.notnull(d[0]): | |||
title = d[0] | |||
if title == "功能模块": | |||
er_title.add(d[1]) | |||
join_str = "" | |||
for i in d[1:]: | |||
if pd.notnull(i): | |||
join_str +=i | |||
str_dict[wdys1.get(title)] = join_str | |||
else: | |||
if title == "功能模块": | |||
er_title.add(d[1]) | |||
for i in d[1:]: | |||
if pd.notnull(i): | |||
join_str +=i | |||
str_dict[wdys1.get(title)] = str_dict.get(wdys1.get(title)) + join_str | |||
# print(str_dict) | |||
mysql.sql_change_msg( | |||
"""insert into user_data (xmmc, xzwt, xtjc, xmmb, yqjx, jsxq, sjxq, aqxq, ywly, hxyw, ywxq, ywxt, jscj, yhfw, mbqt, jsnr, gnmk, sjgx, znys) value ('%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s')""" | |||
% (dl[0], str_dict.get("xzwt") if str_dict.get("xzwt") else None, | |||
str_dict.get("xtjc") if str_dict.get("xtjc") else None, | |||
str_dict.get("xmmb") if str_dict.get("xmmb") else None, | |||
str_dict.get("yqjx") if str_dict.get("yqjx") else None, | |||
str_dict.get("jsxq") if str_dict.get("jsxq") else None, | |||
str_dict.get("sjxq") if str_dict.get("sjxq") else None, | |||
str_dict.get("aqxq") if str_dict.get("aqxq") else None, | |||
str_dict.get("ywly") if str_dict.get("ywly") else None, | |||
str_dict.get("hxyw") if str_dict.get("hxyw") else None, | |||
str_dict.get("ywxq") if str_dict.get("ywxq") else None, | |||
str_dict.get("ywxt") if str_dict.get("ywxt") else None, | |||
str_dict.get("jscj") if str_dict.get("jscj") else None, | |||
str_dict.get("yhfw") if str_dict.get("yhfw") else None, | |||
str_dict.get("mbqt") if str_dict.get("mbqt") else None, | |||
str_dict.get("jsnr") if str_dict.get("jsnr") else None, | |||
str_dict.get("gnmk") if str_dict.get("gnmk") else None, | |||
str_dict.get("sjgx") if str_dict.get("sjgx") else None, | |||
str_dict.get("znys") if str_dict.get("znys") else None)) | |||
# 或取所有的xmnr_copy1 | |||
xmnr_copy1 = mysql.sql_select_many("""select * from user_history_data""") | |||
# 对比xmnr_copy1和xmnr维度是否都有 | |||
if xmnr_copy1: | |||
for xc in xmnr_copy1: | |||
total_keywords = {} | |||
total_similarity = 0 | |||
dup_count = 0 | |||
# 保存相加后的相似度到idc_project_check | |||
mysql.sql_change_msg( | |||
"""insert into idc_project_check (project_id, dup_project_name, file_path, company_name, create_year, project_tag, project_range_tag, project_area, create_time, update_time) value ('%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s')""" | |||
% (dl[0], xc.get("xmmc"), dl[1], "", "", "需求相似、业务相似", "历史项目", "", | |||
str(datetime.datetime.now())[:-7], str(datetime.datetime.now())[:-7])) | |||
dup_id = mysql.cur.lastrowid | |||
for x in list(xc.keys())[1:]: | |||
content_x = xc.get(x) | |||
content_y = str_dict.get(x) | |||
if content_x and content_y: | |||
if x == 'gnmk': | |||
continue | |||
elif x == 'jsnr': | |||
continue | |||
else: | |||
dup_count += 1 | |||
for x in list(xc.keys())[1:]: | |||
content_x = xc.get(x) | |||
content_y = str_dict.get(x) | |||
if content_x and content_y: | |||
if x == 'gnmk': | |||
# 匹配到历史数据,次数加1 | |||
# dup_count += 1 | |||
# 循环遍历每一个维度 | |||
similarity = cosin_similarity.CosineSimilarity(content_x, content_y) | |||
# 相似度 关键词 | |||
similarity, keywords_x, keywords_y = similarity.main() | |||
similarity = similarity * 50 | |||
print("**************相似度: %.2f%%" % similarity, "关键词: %s" % keywords_y) | |||
# 相似度相加 | |||
total_similarity += similarity | |||
# 关键词收集 | |||
total_keywords[x] = keywords_y | |||
function_content = content_y | |||
dup_function_content = content_x | |||
for word_y in keywords_y: | |||
word_y = word_y.strip().strip("'").strip('"') | |||
function_content = str(function_content.replace("\"", "'")).replace(word_y, | |||
f'<span class="similarity">{word_y.strip()}</span>') | |||
for word_x in keywords_x: | |||
word_x = word_x.strip().strip("'").strip('"') | |||
dup_function_content = str(dup_function_content.replace("\"", "'")).replace(word_x, | |||
f'<span class="similarity">{word_x.strip()}</span>') | |||
# 保存每个维度对应的相似度到idc_project_check_detail | |||
mysql.sql_change_msg( | |||
"""insert into idc_project_check_detail (dup_id, dimension, similarity, function_content, dup_function_content, create_time, update_time) value (%d, '%s', %f, '%s', '%s', '%s', '%s')""" | |||
% (dup_id, wdys2.get(x), similarity, escape_string(function_content), | |||
escape_string(dup_function_content), str(datetime.datetime.now())[:-7], | |||
str(datetime.datetime.now())[:-7])) | |||
# content = content.replace(gjcs, f'<span class="similarity">{gjcs.strip()}</span>') | |||
elif x == 'jsnr': | |||
similarity = cosin_similarity.CosineSimilarity(content_x, content_y) | |||
# 相似度 关键词 | |||
similarity, keywords_x, keywords_y = similarity.main() | |||
similarity = similarity * 40 | |||
print("**************相似度: %.2f%%" % similarity, "关键词: %s" % keywords_y) | |||
# 相似度相加 | |||
total_similarity += similarity | |||
# 关键词收集 | |||
total_keywords[x] = keywords_y | |||
function_content = content_y | |||
dup_function_content = content_x | |||
for word_y in keywords_y: | |||
word_y = word_y.strip().strip("'").strip('"') | |||
function_content = str(function_content.replace("\"", "'")).replace(word_y, | |||
f'<span class="similarity">{word_y.strip()}</span>') | |||
for word_x in keywords_x: | |||
word_x = word_x.strip().strip("'").strip('"') | |||
dup_function_content = str(dup_function_content.replace("\"", "'")).replace(word_x, | |||
f'<span class="similarity">{word_x.strip()}</span>') | |||
# 保存每个维度对应的相似度到idc_project_check_detail | |||
mysql.sql_change_msg( | |||
"""insert into idc_project_check_detail (dup_id, dimension, similarity, function_content, dup_function_content, create_time, update_time) value (%d, '%s', %f, '%s', '%s', '%s', '%s')""" | |||
% (dup_id, wdys2.get(x), similarity, escape_string(function_content), | |||
escape_string(dup_function_content), str(datetime.datetime.now())[:-7], | |||
str(datetime.datetime.now())[:-7])) | |||
else: | |||
similarity = cosin_similarity.CosineSimilarity(content_x, content_y) | |||
# 相似度 关键词 | |||
similarity, keywords_x, keywords_y = similarity.main() | |||
similarity = similarity * (10 / dup_count) | |||
print("**************相似度: %.2f%%" % similarity, "关键词: %s" % keywords_y) | |||
# 相似度相加 | |||
total_similarity += similarity | |||
# 关键词收集 | |||
total_keywords[x] = keywords_y | |||
function_content = content_y | |||
dup_function_content = content_x | |||
for word_y in keywords_y: | |||
word_y = word_y.strip().strip("'").strip('"') | |||
function_content = str(function_content.replace("\"", "'")).replace(word_y, | |||
f'<span class="similarity">{word_y.strip()}</span>') | |||
for word_x in keywords_x: | |||
word_x = word_x.strip().strip("'").strip('"') | |||
dup_function_content = str(dup_function_content.replace("\"", "'")).replace(word_x, | |||
f'<span class="similarity">{word_x.strip()}</span>') | |||
# 保存每个维度对应的相似度到idc_project_check_detail | |||
mysql.sql_change_msg( | |||
"""insert into idc_project_check_detail (dup_id, dimension, similarity, function_content, dup_function_content, create_time, update_time) value (%d, '%s', %f, '%s', '%s', '%s', '%s')""" | |||
% (dup_id, wdys2.get(x), similarity, escape_string(function_content), | |||
escape_string(dup_function_content), str(datetime.datetime.now())[:-7], | |||
str(datetime.datetime.now())[:-7])) | |||
mysql.sql_change_msg( | |||
"""update idc_project_check set similarity=%f where dup_id=%d""" % (total_similarity, dup_id)) | |||
project_gjc = {} | |||
for w in wdys2.keys(): | |||
content_x = str_dict.get(w) | |||
content_y = str_dict.get(w) | |||
if content_x and content_y: | |||
# 循环遍历每一个维度 | |||
similarity = cosin_similarity.CosineSimilarity(content_x, content_y) | |||
# 相似度 关键词 | |||
similarity, keywords_x, keywords = similarity.main() | |||
project_gjc[w] = keywords | |||
mysql.sql_change_msg( | |||
"""insert into user_keyword (xmmc, xzwt, xtjc, xmmb, yqjx, jsxq, sjxq, aqxq, ywly, hxyw, ywxq, ywxt, jscj, yhfw, mbqt, jsnr, gnmk, sjgx, znys) value ('%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s')""" | |||
% (dl[0], str(project_gjc.get("xzwt"))[1:-1] if project_gjc.get("xzwt") else None, | |||
str(project_gjc.get("xtjc"))[1:-1] if project_gjc.get("xtjc") else None, | |||
str(project_gjc.get("xmmb"))[1:-1] if project_gjc.get("xmmb") else None, | |||
str(project_gjc.get("yqjx"))[1:-1] if project_gjc.get("yqjx") else None, | |||
str(project_gjc.get("jsxq"))[1:-1] if project_gjc.get("jsxq") else None, | |||
str(project_gjc.get("sjxq"))[1:-1] if project_gjc.get("sjxq") else None, | |||
str(project_gjc.get("aqxq"))[1:-1] if project_gjc.get("aqxq") else None, | |||
str(project_gjc.get("ywly"))[1:-1] if project_gjc.get("ywly") else None, | |||
str(project_gjc.get("hxyw"))[1:-1] if project_gjc.get("hxyw") else None, | |||
str(project_gjc.get("ywxq"))[1:-1] if project_gjc.get("ywxq") else None, | |||
str(project_gjc.get("ywxt"))[1:-1] if project_gjc.get("ywxt") else None, | |||
str(project_gjc.get("jscj"))[1:-1] if project_gjc.get("jscj") else None, | |||
str(project_gjc.get("yhfw"))[1:-1] if project_gjc.get("yhfw") else None, | |||
str(project_gjc.get("mbqt"))[1:-1] if project_gjc.get("mbqt") else None, | |||
str(project_gjc.get("jsnr"))[1:-1] if project_gjc.get("jsnr") else None, | |||
str(project_gjc.get("gnmk"))[1:-1] if project_gjc.get("gnmk") else None, | |||
str(project_gjc.get("sjgx"))[1:-1] if project_gjc.get("sjgx") else None, | |||
str(project_gjc.get("znys"))[1:-1] if project_gjc.get("znys") else None)) | |||
mysql.sql_change_msg( | |||
"""update idc_project set dup_status=3, one_vote_veto_status=1, self_check_status=1, history_project_count=%d ,module_count=%d where project_id=%d""" % ( | |||
xmnr_count, gnmk_count, dl[0])) | |||
gong_neng_mo_kuai(mysql, dl, data, er_title) | |||
if __name__ == "__main__": | |||
all_path = requests.get("http://127.0.0.1:19099/check/duplicates/%s" % 320).json() | |||
print(all_path) | |||
data_list = [] | |||
for ap in all_path.get("data"): | |||
# if os.path.exists(ap.get("file_path")): | |||
data_list.append((ap.get("project_id"), ap.get("file_path"), ap.get("project_name"))) | |||
print(data_list) | |||
data_list = [(11, r"D:\ningda\dup_check2\dup_check\0825-丽水系统查重维度1.xlsx", "0216-2")] | |||
project_check(data_list) |
@@ -0,0 +1,65 @@ | |||
import os | |||
os.environ['KMP_DUPLICATE_LIB_OK'] = 'True' | |||
from modelscope.pipelines import pipeline | |||
from modelscope.utils.constant import Tasks | |||
# 替换换行字符 | |||
def replace_newlines(text, new_line=''): | |||
# 替换所有类型的换行符 | |||
return text.replace('\r\n', new_line).replace('\r', new_line).replace('\n', new_line) | |||
# 使用bert计算文本相识度 | |||
class Bert_nlp(object): | |||
def __init__(self, nlp_type): | |||
self.nlp_type = nlp_type | |||
if nlp_type == "structbert": | |||
model_id = "damo/nlp_structbert_sentence-similarity_chinese-large" | |||
self.semantic_cls = pipeline(Tasks.sentence_similarity, model_id) | |||
elif nlp_type == "corom": | |||
# , sequence_length=1024 /Users/kebobo/.cache/modelscope/hub/damo/nlp_corom_sentence-embedding_chinese-tiny | |||
model_id = "damo/nlp_corom_sentence-embedding_chinese-tiny" | |||
self.semantic_cls = pipeline(Tasks.sentence_embedding, model=model_id) | |||
def main(self, content1, contents): | |||
# if content1 is None or content1 == "None": | |||
# return 0, "", "", -1 | |||
score = 0.0 | |||
if len(contents) == 0: | |||
return score, content1, "", -1 | |||
if self.nlp_type == "structbert": | |||
result = self.semantic_cls(input=(content1, contents[0])) | |||
print(result) | |||
labels = result["labels"] | |||
acq = labels.index("1") | |||
score = result["scores"][acq] | |||
elif self.nlp_type == "corom": | |||
inputs = { | |||
"source_sentence": [ | |||
replace_newlines(content1) | |||
], | |||
"sentences_to_compare": contents | |||
} | |||
result = self.semantic_cls(input=inputs) | |||
print(result) | |||
arr = result["scores"] | |||
score = max(arr) | |||
idx = arr.index(score) | |||
return score, content1, contents[idx], idx | |||
if __name__ == "__main__": | |||
content1 = """主要功能为快速进行学生课堂评价及小组评价""" | |||
content2 = ["""用户通过建设单位账户进入建设单位门户,建设单位门户主要展示本单位项目信息、通知公告与政策文件栏、待办事项栏、本单位进行中项目栏模块。""", | |||
"""主要功能为快速进行学生课堂评价及小组评价""" | |||
] | |||
nlp = Bert_nlp("corom") | |||
print(nlp.main(content1, content2)) | |||
@@ -0,0 +1,113 @@ | |||
# coding=utf-8 | |||
import pymysql | |||
from dbutils.pooled_db import PooledDB | |||
# from dbutils.persistent_db import PersistentDB | |||
mysqlInfo = { | |||
"host": '47.98.125.47', | |||
"user": 'root', | |||
"passwd": 'NingdaKeji123!', | |||
"db": 'idc', | |||
"port": 3306, | |||
"charset": "utf8" | |||
} | |||
class ConnMysql(object): | |||
__pool = None | |||
def __init__(self): | |||
# 构造函数,创建数据库连接、游标 | |||
self.coon = ConnMysql._get_mysql_conn() | |||
self.cur = self.coon.cursor(cursor=pymysql.cursors.DictCursor) | |||
# 数据库连接池连接 | |||
@staticmethod | |||
def _get_mysql_conn(): | |||
global __pool | |||
if ConnMysql.__pool is None: | |||
__pool = PooledDB( | |||
creator=pymysql, | |||
mincached=1, | |||
maxcached=5, | |||
maxconnections=6, | |||
maxshared=3, | |||
blocking=True, | |||
maxusage=None, | |||
setsession=[], | |||
ping=2, | |||
host=mysqlInfo['host'], | |||
user=mysqlInfo['user'], | |||
passwd=mysqlInfo['passwd'], | |||
db=mysqlInfo['db'], | |||
port=mysqlInfo['port'], | |||
charset=mysqlInfo['charset']) | |||
return __pool.connection() | |||
# 插入、修改、删除一条 | |||
def sql_change_msg(self, sql): | |||
change_sql = self.cur.execute(sql) | |||
self.coon.commit() | |||
return change_sql | |||
# 查询一条 | |||
def sql_select_one(self, sql): | |||
self.cur.execute(sql) | |||
select_res = self.cur.fetchone() | |||
return select_res | |||
# 查询多条 | |||
def sql_select_many(self, sql, count=None): | |||
self.cur.execute(sql) | |||
if count is None: | |||
select_res = self.cur.fetchall() | |||
else: | |||
select_res = self.cur.fetchmany(count) | |||
return select_res | |||
# 释放资源 | |||
def release(self): | |||
self.coon.close() | |||
self.cur.close() | |||
if __name__ == '__main__': | |||
[{'Tables_in_idc': 'gjc'}, | |||
{'Tables_in_idc': 'gjc2'}, | |||
{'Tables_in_idc': 'idc_dept'}, | |||
{'Tables_in_idc': 'idc_project'}, {'Tables_in_idc': 'idc_project_check'}, | |||
{'Tables_in_idc': 'idc_project_check_detail'}, {'Tables_in_idc': 'idc_project_module'}, | |||
{'Tables_in_idc': 'idc_project_module_check'}, {'Tables_in_idc': 'idc_project_module_check_detail'}, | |||
{'Tables_in_idc': 'idc_user'}, {'Tables_in_idc': 'idc_user_dept'}, {'Tables_in_idc': 'mk2'}] | |||
# print(ConnMysql().sql_select_many("show tables;")) | |||
mysql = ConnMysql() | |||
# mysql.sql_change_msg("""insert into idc_project (project_name,file_path) value ('%s', '%s')""" % ("森林火险", "/opt/idc/file/20220924/79a53829-8965-4aof-a342-c532f6c9c2a3森林火险.xlsx")) | |||
# print(mysql.sql_select_many("""select * from gjc""")) | |||
# print(mysql.sql_select_many("""select * from gjc2 where id=dup_file_test""")) | |||
# print(mysql.sql_select_many("""select * from xmnr""")) | |||
# print(mysql.sql_select_many("""select * from gjc_copy1""")) | |||
# print(mysql.sql_select_one("""select * from idc_project_check""")) | |||
# print(mysql.sql_select_one("""select * from idc_project_check_detail""")) | |||
# print(mysql.sql_select_many("""select * from idc_project_module""")) | |||
# print(mysql.sql_select_many("""select * from idc_project_module where project_id=%d""" % int(7))) | |||
# print( mysql.sql_select_one("""select dup_id from idc_project_check where project_id=%d"""% int(7))) | |||
# print(len(mysql.sql_select_many("""select * from xmnr_copy1"""))) | |||
# print(len(mysql.sql_select_many("""select * from user_history_data"""))) | |||
print(len(mysql.sql_select_many("""select * from user_history_data"""))) | |||
"""查重复select * from user_history_module_data where gnms in (select gnms from user_history_module_data group by gnms having count(gnms)>1); | |||
""" | |||
# print() | |||
# str_dict={} | |||
# cmnr_count=551 | |||
# gnmkcount=1192 | |||
# | |||
# print(mysql.sql_change_msg( | |||
# """update idc_project set company_name=%s, dup_status=3, one_vote_veto_status=dup_file_test, self_check_status=dup_file_test, history_project_count=%d ,module_count=%d where project_id=%d""" % ( | |||
# str_dict.get('sbdw'), xmnr_count=551, gnmk_count=1192, 104))) | |||
# print(mysql.sql_change_msg( | |||
# """update idc_project set dup_status=3, one_vote_veto_status=dup_file_test, self_check_status=dup_file_test, history_project_count=%d ,module_count=%d where project_id=%d""" % ( | |||
# ) | |||
# for k, v in mysql.sql_select_one("""select * from idc_project_check_detail""").items(): | |||
# print(k, v) |
@@ -0,0 +1 @@ | |||
nohup python3 flask_server.py >> nohup.info 2>&1 & |
@@ -0,0 +1,5 @@ | |||
* Serving Flask app 'flask_server' (lazy loading) | |||
* Environment: production | |||
WARNING: This is a development server. Do not use it in a production deployment. | |||
Use a production WSGI server instead. | |||
* Debug mode: off |
@@ -0,0 +1,63 @@ | |||
import mysql_pool | |||
import heapq | |||
import uuid | |||
import os | |||
os.environ['KMP_DUPLICATE_LIB_OK'] = 'True' | |||
from modelscope.pipelines import pipeline | |||
from modelscope.utils.constant import Tasks | |||
def check_demo(): | |||
batch_no = str(uuid.uuid4()) | |||
mysql = mysql_pool.ConnMysql() | |||
data = mysql.sql_select_many("""select * from test_pro_info_new where super_unit LIKE '%农%'""") | |||
# 全部的项目信息 | |||
# pro_map = [] | |||
# for ap1 in data: | |||
# print(type(ap1.get("base_proj_intro"))) | |||
# # pro_list.append((ap.get("base_proj_name"), ap.get("base_proj_intro"))) | |||
# pro_map[ap1.get("base_proj_intro")] = ap1.get("base_proj_name") | |||
# 获取模型 | |||
model_id = "damo/nlp_corom_sentence-embedding_chinese-tiny" | |||
semantic_cls = pipeline(Tasks.sentence_embedding, model=model_id) | |||
for pro in data: | |||
# try: | |||
pro_info_list = [] | |||
# print(pro.get("base_area_code")[0:4]) | |||
for ap in data: | |||
# if ap.get("base_proj_intro") != pro.get("base_proj_intro") and ap.get("base_area_code")[0:4] == pro.get("base_area_code")[0:4]: | |||
if ap.get("base_proj_intro") != pro.get("base_proj_intro"): | |||
pro_info_list.append(str(ap.get("base_proj_intro")).replace('\n', '')) | |||
inputs = { | |||
"source_sentence": [ | |||
pro.get("base_proj_intro") | |||
], | |||
"sentences_to_compare": pro_info_list | |||
} | |||
result = semantic_cls(input=inputs) | |||
print(result) | |||
arr = result["scores"] | |||
top_3 = heapq.nlargest(3, arr) | |||
for ele in top_3: | |||
idx = arr.index(ele) | |||
# print(pro_info_list[idx]) | |||
for ele1 in data: | |||
if ele1.get("base_proj_intro") == pro_info_list[idx]: | |||
mysql.sql_change_msg( | |||
"""insert into test_pro_check (pro_name, pro_info, check_pro_name, check_pro_info, batch_no, score, pro_area, check_pro_area, pro_set_year, check_pro_set_year, create_time) value("%s" ,"%s", "%s", "%s", "%s", "%f", "%s", "%s", "%s", "%s", now())""" % ( | |||
pro.get("base_proj_name"), pro.get("base_proj_intro"), ele1.get("base_proj_name"), pro_info_list[idx], batch_no, ele, pro.get("base_area_name"), ele1.get("base_area_name"), pro.get("base_proj_set_year"), ele1.get("base_proj_set_year"))) | |||
break | |||
# except Exception: | |||
# mysql.sql_change_msg( | |||
# """insert into test_pro_check (pro_name, pro_info, batch_no, score, pro_area, pro_set_year, create_time) value("%s" ,"%s", "%s", "%f", "%s", "%s", now())""" % ( | |||
# pro.get("base_proj_name"), pro.get("base_proj_intro"), batch_no, 0, pro.get("base_area_name"), pro.get("base_proj_set_year"))) | |||
if __name__ == "__main__": | |||
check_demo() | |||
@@ -0,0 +1,43 @@ | |||
certifi==2022.6.15 | |||
cffi==1.15.1 | |||
chardet==5.0.0 | |||
charset-normalizer==2.0.12 | |||
click==8.0.4 | |||
colorama==0.4.5 | |||
cryptography==3.4.7 | |||
dataclasses==0.8 | |||
DBUtils==3.0.2 | |||
et-xmlfile==1.1.0 | |||
Flask==1.0.2 | |||
idna==3.3 | |||
importlib-metadata==4.8.3 | |||
itsdangerous==2.0.1 | |||
jieba==0.42.1 | |||
Jinja2==3.0.3 | |||
joblib==1.1.0 | |||
MarkupSafe==2.0.1 | |||
numpy==1.19.5 | |||
openpyxl==3.0.10 | |||
pandas==1.1.5 | |||
pdfminer.six==20211012 | |||
pdfplumber==0.6.0 | |||
Pillow==8.4.0 | |||
pycparser==2.21 | |||
PyMySQL==0.10.1 | |||
pypiwin32==223 | |||
python-dateutil==2.8.2 | |||
pytz==2022.2.1 | |||
pywin32==304 | |||
requests~=2.31.0 | |||
scikit-learn~=1.1.3 | |||
scipy==1.5.4 | |||
six==1.16.0 | |||
threadpoolctl==3.1.0 | |||
typing_extensions==4.1.1 | |||
urllib3==1.26.12 | |||
Wand==0.6.10 | |||
Werkzeug==2.0.3 | |||
zipp==3.6.0 | |||
modelscope~=1.9.2 | |||
sklearn~=0.0.post5 |
@@ -0,0 +1,179 @@ | |||
人民群众 居民 公众 | |||
数据仓 数据仓库 数据高铁 数据集市 数据资源库 | |||
浙江省 省 全省 我省 | |||
政务云 专有云 信创云 电子政务网络 政务云平台 信创云资源 政法云 浙江省政府数据中心政务云平台 | |||
省统建电子健康档案系统 全省电子健康档案数据管理 | |||
日浏览量 pv | |||
日均访问量 uv | |||
数据交换 数据共享 | |||
互联网+健康医疗 健康大脑+ | |||
安全三级测评 等保三级 | |||
安全等级保护 等保 网络安全等级保护 信息安全等级化保护 信息安全保护等级 安全保障体系 信息系统定级 等保测评 国家信息安全等级 | |||
AED 除颤器 除颤仪 | |||
电脑端 pc端 Web端 | |||
HIS 医院信息系统 | |||
监管驾驶舱 监管系统 | |||
GIS 地理信息地图 GIS地理信息技术 | |||
维护 技术支撑 维护人员 网络管理员 系统管理员 软件维护人员 系统支撑 运维团队 平台管理人员 | |||
一体化智能化数据共享平台 IRS系统 政务大数据统一支撑平台 公共数据交换平台 公共数据中心平台 电子政务基础设施一体化管控平台 一体化数字资源系统 | |||
软件 应用 系统 应用软件 | |||
接口 api 数据接口 RestfulAPI | |||
前后端 开发人员 开发 技术人员 IT技术人员 | |||
异步接口 AJAX JSONP | |||
巡检 巡查 | |||
驾驶舱 数字化驾驶舱 数据大屏 数据可视化平台 | |||
信创 信息技术应用创新产业 | |||
防御性验证技术 用户ID/密码 口令 人脸识别 | |||
数据备份工具 pg_dump | |||
日志管理 日志记录 | |||
数字化改革 信息化 数字化 浙江省数字政府改革 政府数字化 | |||
易扩充 可扩展性 开放性 | |||
三单制 动态督考应用 “三单制”履职服务平台 | |||
Java消息服务 Java JMS | |||
虚拟专用网络 VPN 专用网络技术 | |||
短信认证 短信猫 短信网关 | |||
云资源 云服务资源 | |||
缓存中间件 reids 缓存数据库 | |||
最小磁盘空间需求 MDSR | |||
身份认证 身份鉴别 | |||
剩余信息保护 磁盘空闲数据擦除 数据完全擦除 | |||
互联互通 共享性 开放性 互联共享 | |||
数据加工 数据加工清洗平台 | |||
面向服务架构 SOA | |||
集成融合技术 SOI | |||
消息协议 http jms | |||
直接连接存储 DAS | |||
网络连接存储设备 NAS | |||
磁盘阵列技术 RAID | |||
双机容错技术 双机热备份技术 对子双机容错 | |||
安全防护软件 安全软件 | |||
JAAS Java认证与授权服务 | |||
WebService web应用程序分支 | |||
食品小作坊数字化监管平台 红盾智坊 | |||
政务服务网用户体系 浙里办 | |||
浙政钉用户体系 浙政钉 | |||
AU 总用户量 | |||
DC 每用户每天产生数据增量 | |||
YD 每年有效工作日 | |||
C 为存储冗余量比例 | |||
F 为系统未来3~5年的业务量发展冗余预留,发展系数设以1起算 | |||
IP地址伪装技术 NAT | |||
VR 全景VR | |||
离线引擎 Hive odps | |||
在线引擎 sck | |||
时序数据库 Druid、HBase | |||
关系型数据库 PostgreSQL | |||
浙里护苗 未成年人全生命周期专题库 | |||
逻辑块地址 LBA | |||
数据块 Block | |||
云防火墙 防火墙的硬件虚拟化技术 | |||
云杀毒 云环境下的杀毒策略 | |||
国产操作系统 国产信创麒麟操作系统 统信UOS 中标Linux 中标麒麟 麒麟桌面操作系统 | |||
项目问题 PPR ProjectProblemReport | |||
项目干系人 Stakeholder | |||
变更请求 CRR ChangeRequestReport 需求变更 | |||
软件产品需求基准 Baseline | |||
新需求 NewRequirement | |||
需求取消 CanceledRequirement | |||
CCB ChangeControlBoard | |||
软件问题 SPR SoftwareProblemReport | |||
问题 Issue | |||
软件问题 SPR | |||
Q&A QuestionAndAnswer | |||
质量保证员 QA | |||
揭榜挂帅 组团赛马 | |||
B/S Browser/Server 浏览器/服务器 | |||
数据管理层 Server | |||
用户界面层 Client | |||
ECMAScript 欧洲计算机协会制定的js规范 | |||
JSON JavaScriptObject Notation JS 对象简谱 | |||
Hadoop 分布式文件系统 HDFS | |||
网络数据传输方式 TCP UDP HTTP 串口 | |||
专有钉钉 专有钉钉开放平台 | |||
应用服务器 ApplicationServer | |||
开放字体格式 WOFF | |||
GNSS 表面位移监 | |||
MEMS 加速度计 | |||
图形工具 ArcGIS | |||
实时数据流处理技术 Storm 连续计算 | |||
CDEM 连续-非连 Continuum Discontinuum Element Method | |||
IVR 手机智能群呼 | |||
B/S 开发工具 Arcgis servers | |||
BSD 风险许可证 | |||
C&C 肉鸡检测 | |||
云计算 cloud computing | |||
增量备份 incrementalbackup | |||
log 日志 | |||
Web应用防火墙 WAF | |||
入侵检测系统 IDS | |||
国产处理器芯片 RK3399 | |||
无故障时间 MTBF | |||
平均修复时间 MTTR | |||
单点登录功能 SSO | |||
入侵检测 IDS | |||
现代浏览器 Chrome Safari FireFox | |||
Service-Oriented-Architecture SOA 面向服务的体系结构 | |||
模型-视图-控制器 MVC MODEL-VIEW-CONTROLLER | |||
简体中文 GB2312 UNICODE UTF-7 UFT-8 | |||
BIG5 繁体中文 | |||
声音计量仪器 GPRS噪声变送器 | |||
GPS 定位与导航 | |||
可燃气体探测器 报警器 | |||
business component name 业务组件名称 | |||
EAI 企业应用集成 | |||
OGC标准服务 WMS WMS-C WFS WCS | |||
地图瓦片服务 WMTS | |||
表现层 UI | |||
业务逻辑层 BLL | |||
数据访问层 DAL | |||
三层架构 3-tierapplication | |||
跨站脚本漏洞 XSS | |||
发光二极管 LED | |||
电视信号 VCD | |||
录像机 DVD | |||
影碟机 LD | |||
影像 Video | |||
电插锁 阳极锁 | |||
阴极锁 电锁口 | |||
接入路数 接入带宽 | |||
即插即用 UPnP | |||
SNMP 简单网络管理 | |||
NTP 网络校时 | |||
SADP 自动搜索IP地址 | |||
SMTP 邮件服务 | |||
NFS NAS网盘 | |||
iSCSI IP SAN网盘 | |||
PPPoE 拨号上网 | |||
移动终端应用 APP 移动客户端 | |||
OLT Optical Line Terminal 光纤线路终端 | |||
ONU Optical Network Unit 光纤网络单元 光网络单元 | |||
光配线网 ODN | |||
数字沙盘 数字地理信息系统 | |||
地理信息服务总线 ESB | |||
Geo-ESB Geographic Enterprise Service Bus 地理信息企业服务总线 | |||
JMS Java Message Service | |||
搜索服务器 Elasticsearch群集 | |||
农家乐 民宿 | |||
城市大脑 城市治理 | |||
好龙泉 i龙泉模块 | |||
网络态势感知 Cyberspace Situation Awareness CSA | |||
消息队列 kafka amq rmq | |||
数据集成服务 ETL | |||
数据抽取 Extract | |||
转换 Transform | |||
加载 Load | |||
数据存储 ODS | |||
数据仓库 DW | |||
数值型数据 度量 | |||
角度 维度 | |||
指标 KPI | |||
多维数据集 OLAP 数据立方 | |||
元数据 Metadata | |||
MTBR 平均无故障率 | |||
MBTF 平均无故障时间 | |||
软件防护 防病毒软件 | |||
硬件防护 硬件防火墙 | |||
数据库管理员 SA | |||
水政务协同 整体智治 | |||
数字创新,一网智办 政策项目管理与服务平台 | |||
龙财通 数字创新场景迭代升级建设项目 |
@@ -0,0 +1,81 @@ | |||
社会主义 ns | |||
信息化 ns | |||
最多跑一次 ns | |||
零次跑 ns | |||
多跨协同 ns | |||
数字化改革 ns | |||
区块链 ns | |||
大数据 ns | |||
物联网 ns | |||
智能化 ns | |||
数字化 ns | |||
OA ns | |||
子模块 ns | |||
政治面貌 ns | |||
社会背景 ns | |||
职称职务 ns | |||
数字政府 ns | |||
一件事 ns | |||
智慧监管 ns | |||
互联网+ ns | |||
政务云 ns | |||
四横三纵 ns | |||
政务钉钉 ns | |||
数据共享 ns | |||
业务协同 ns | |||
数据协同 ns | |||
钉钉 ns | |||
数据仓 ns | |||
领导驾驶舱 ns | |||
条线 ns | |||
卫健委 ns | |||
政区划码 ns | |||
监管信息 ns | |||
任务调度 ns | |||
IRS ns | |||
JSON ns | |||
API ns | |||
SM3 ns | |||
AED ns | |||
HIS ns | |||
GIS ns | |||
api ns | |||
信创 ns | |||
日志管理 ns | |||
权限管理 ns | |||
角色管理 ns | |||
短信认证 ns | |||
短信猫 ns | |||
短信网关 ns | |||
云资源 ns | |||
MDSR ns | |||
身份认证 ns | |||
SOA ns | |||
DAS ns | |||
SOI ns | |||
http ns | |||
揭榜挂帅 ns | |||
专有钉钉 ns | |||
GNSS ns | |||
MEMS ns | |||
ArcGIS ns | |||
IVR ns | |||
BSD ns | |||
GPS ns | |||
数字沙盘 ns | |||
城市大脑 ns | |||
KPI ns | |||
一体化智能化数据共享平台 ns | |||
三单制 ns | |||
JAAS ns | |||
浙里办 ns | |||
浙政钉 ns | |||
VR ns | |||
浙里护苗 ns | |||
CCB ns | |||
B/S ns | |||
EAI ns | |||
ESB ns | |||
Web ns | |||
一张图 ns | |||
nan ns |