# coding=utf-8 import re import mysql_pool from pymysql.converters import escape_string import cosin_similarity import pandas as pd import glm_utils import os import json wdys1 = { "项目名称": "xmmc", "现状问题": "xzwt", "系统基础": "xtjc", "项目目标": "xmmb", "预期绩效": "yqjx", "建设需求": "jsxq", "数据需求": "sjxq", "安全需求": "aqxq", "业务领域": "ywly", "核心业务": "hxyw", "业务需求": "ywxq", "业务协同": "ywxt", "建设层级": "jscj", "用户范围": "yhfw", "目标群体": "mbqt", "建设内容": "jsnr", "功能模块": "gnmk", "数据共享": "sjgx", "智能要素": "znys", "申报单位": "sbdw", "所属地区": "ssdq", "预算年度": "ysnd" } wdys2 = { "xmmc": "项目名称", "xzwt": "现状问题", "xtjc": "系统基础", "xmmb": "项目目标", "yqjx": "预期绩效", "jsxq": "建设需求", "sjxq": "数据需求", "aqxq": "安全需求", "ywly": "业务领域", "hxyw": "核心业务", "ywxq": "业务需求", "ywxt": "业务协同", "jscj": "建设层级", "yhfw": "用户范围", "mbqt": "目标群体", "jsnr": "建设内容", "gnmk": "功能模块", "sjgx": "数据共享", "znys": "智能要素", "sbdw": "申报单位", "ssdq": "所属地区", "ysnd": "预算年度" } gnmkys = { "gnmc": "功能名称", "gnms": "功能描述" } def getFlag(): data_dict = {} df = pd.read_excel("0825-丽水系统查重维度.xlsx") data = df.values data = list(pd.Series(data[:, 1]).dropna()) for d in data: try: wd = re.search("(.*?)(.*?%)", d).group(1).strip() wdc = wdys1.get(wd) if wdc: qz = re.search(".*?((.*?%))", d).group(1) data_dict[wdc] = qz except: pass return data_dict # getFlag() def gong_neng_mo_kuai(xmmc, mysql, dl, data, er_title, line): # 将excel文件中的所有第三维度内容进行拼接 str_dict = {} for et in er_title: for d in data: if d[1] == et: if str_dict.get(et): str_dict[et] = str_dict.get(et) + d[3] else: str_dict[et] = d[3] for k, v in str_dict.items(): mysql.sql_change_msg("""insert into user_history_module_data(xmmc,gnmc,gnms,line, remark) value("%s", "%s", "%s", "%s", "%s")""" % ( escape_string(xmmc), escape_string(k), escape_string(v), line, "")) # similarity = cosin_similarity.CosineSimilarity(v, v) similarity, keywords_x, keywords_y = similarity.main() mysql.sql_change_msg("""insert into user_history_module_keywords (xmmc,gnmc,gnms,line) value("%s" ,"%s", "%s", "%s")""" % ( xmmc, escape_string(k), str(keywords_y)[1:-1], line)) def project_check(data_list, line): mysql = mysql_pool.ConnMysql() # 读取维度和权重 # get_data_dict = getFlag() # 遍历excel存储路径 for dl in data_list: # path = "0825-丽水系统查重维度1.xlsx" # 读取路径下的excel print(dl) df = pd.read_excel(dl[1]) xmmc = df.keys() # print(type(xmmc[dup_file_test])) xmmc=xmmc[1] # print(type(xmmc)) # xmmc1='' if "可研报告"or "可研性报告"or "可行性研究报告" in xmmc: xmmc=xmmc.replace('可研报告','') xmmc=xmmc.replace('可研性报告','') xmmc=xmmc.replace('可行性研究报告','') # print(xmmc) data = df.values # 将excel文件中的所有维度内容进行拼接 join_str = "" str_dict = {} title = "" er_title = set() # for d in data: # # print(d) # if pd.notnull(d[0]): # title = d[0] # if title == "功能模块": # er_title.add(d[dup_file_test]) # join_str = "" # for i in d[dup_file_test:]: # if pd.notnull(i): # join_str += i # str_dict[wdys1.get(title)] = join_str # else: # if title == "功能模块": # er_title.add(d[dup_file_test]) # for i in d[dup_file_test:]: # if pd.notnull(i): # join_str += i # str_dict[wdys1.get(title)] = str_dict.get(wdys1.get(title)) + join_str # print(str_dict) gnmk_str = [] # print(data) for d in data: if pd.notnull(d[0]): title = d[0] if title == "功能模块": er_title.add(d[1]) join_str = "" for i in d[1:]: # print(type(i)) # i=str(i) if pd.notnull(i): join_str += str(i) if title == "功能模块": # for j in d[3:]: if i == '功能描述': continue else: gnmk_str.append(i) str_dict[wdys1.get(title)] = join_str # print(str_dict.get(wdys1.get(title))) else: if title == "功能模块": er_title.add(d[1]) for i in d[3:]: if pd.notnull(i): join_str += str(i) if title == "功能模块": gnmk_str.append(i) str_dict[wdys1.get(title)] = str_dict.get(wdys1.get(title)) + join_str # gnmk="".join(gnmk_str) # str_dict['gnmk']=gnmk gnmk = ",".join(gnmk_str) str_dict['gnmk'] = gnmk # print(str_dict) # print(str_dict.get("xzwt")if str_dict.get("xzwt") else None) # print(str_dict.get('gnmk')if str_dict.get('gnmk')else None) mysql.sql_change_msg( """insert into user_history_data (xmmc, xzwt, xtjc, xmmb, yqjx, jsxq, sjxq, aqxq, ywly, hxyw, ywxq, ywxt, jscj, yhfw, mbqt, jsnr, gnmk, sjgx, znys,sbdw,ssdq,ysnd,line,remark) value ("%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s","%s","%s","%s","%s","%s")""" % (escape_string(xmmc), escape_string(str_dict.get("xzwt")) if str_dict.get("xzwt") else None, escape_string(str_dict.get("xtjc")) if str_dict.get("xtjc") else None, escape_string(str_dict.get("xmmb")) if str_dict.get("xmmb") else None, escape_string(str_dict.get("yqjx")) if str_dict.get("yqjx") else None, escape_string(str_dict.get("jsxq")) if str_dict.get("jsxq") else None, escape_string(str_dict.get("sjxq")) if str_dict.get("sjxq") else None, escape_string(str_dict.get("aqxq")) if str_dict.get("aqxq") else None, escape_string(str_dict.get("ywly")) if str_dict.get("ywly") else None, escape_string(str_dict.get("hxyw")) if str_dict.get("hxyw") else None, escape_string(str_dict.get("ywxq")) if str_dict.get("ywxq") else None, escape_string(str_dict.get("ywxt")) if str_dict.get("ywxt") else None, escape_string(str_dict.get("jscj")) if str_dict.get("jscj") else None, escape_string(str_dict.get("yhfw")) if str_dict.get("yhfw") else None, escape_string(str_dict.get("mbqt")) if str_dict.get("mbqt") else None, escape_string(str_dict.get("jsnr")) if str_dict.get("jsnr") else None, escape_string(str_dict.get("gnmk")) if str_dict.get("gnmk") else None, escape_string(str_dict.get("sjgx")) if str_dict.get("sjgx") else None, escape_string(str_dict.get("znys")) if str_dict.get("znys") else None, escape_string(str_dict.get("sbdw")) if str_dict.get("sbdw") else None, escape_string(str_dict.get("ssdq")) if str_dict.get("ssdq") else None, escape_string(str_dict.get("ysnd")) if str_dict.get("ysnd") else None, line, "")) project_gjc = {} for w in wdys2.keys(): content_x = str_dict.get(w) content_y = str_dict.get(w) if content_x and content_y: # 循环遍历每一个维度 similarity = cosin_similarity.CosineSimilarity(content_x, content_y) # 相似度 关键词 similarity, keywords_x, keywords_y = similarity.main() project_gjc[w] = keywords_y mysql.sql_change_msg( """insert into user_history_keywords (xmmc, xzwt, xtjc, xmmb, yqjx, jsxq, sjxq, aqxq, ywly, hxyw, ywxq, ywxt, jscj, yhfw, mbqt, jsnr, gnmk, sjgx, znys, line) value ("%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s")""" % (xmmc, str(project_gjc.get("xzwt"))[1:-1] if project_gjc.get("xzwt") else None, str(project_gjc.get("xtjc"))[1:-1] if project_gjc.get("xtjc") else None, str(project_gjc.get("xmmb"))[1:-1] if project_gjc.get("xmmb") else None, str(project_gjc.get("yqjx"))[1:-1] if project_gjc.get("yqjx") else None, str(project_gjc.get("jsxq"))[1:-1] if project_gjc.get("jsxq") else None, str(project_gjc.get("sjxq"))[1:-1] if project_gjc.get("sjxq") else None, str(project_gjc.get("aqxq"))[1:-1] if project_gjc.get("aqxq") else None, str(project_gjc.get("ywly"))[1:-1] if project_gjc.get("ywly") else None, str(project_gjc.get("hxyw"))[1:-1] if project_gjc.get("hxyw") else None, str(project_gjc.get("ywxq"))[1:-1] if project_gjc.get("ywxq") else None, str(project_gjc.get("ywxt"))[1:-1] if project_gjc.get("ywxt") else None, str(project_gjc.get("jscj"))[1:-1] if project_gjc.get("jscj") else None, str(project_gjc.get("yhfw"))[1:-1] if project_gjc.get("yhfw") else None, str(project_gjc.get("mbqt"))[1:-1] if project_gjc.get("mbqt") else None, str(project_gjc.get("jsnr"))[1:-1] if project_gjc.get("jsnr") else None, str(project_gjc.get("gnmk"))[1:-1] if project_gjc.get("gnmk") else None, str(project_gjc.get("sjgx"))[1:-1] if project_gjc.get("sjgx") else None, str(project_gjc.get("znys"))[1:-1] if project_gjc.get("znys") else None, line)) gong_neng_mo_kuai(xmmc, mysql, dl, data, er_title, line) def update_desc(): mysql = mysql_pool.ConnMysql() module_list = mysql.sql_select_many("""select id, gnms from user_history_module_data where xmmc = '丽水市城市管理指挥中心信息系统(一期)项目'""") for module in module_list: # 通过chatglm进行提取信息 gnms = module.get("gnms") content = glm_utils.CallContentResultNew(gnms) mysql.sql_change_msg( """UPDATE user_history_module_data SET glm_desc = "%s" WHERE id = %d""" % ( content if content else None, module.get("id"))) print(content) def update_desc1(): mysql = mysql_pool.ConnMysql() module_list = mysql.sql_select_many("""select id, gnms from gnms_gml where xmmc = '丽水花园云(城市大脑)数字驾驶舱项目'""") for module in module_list: # 通过chatglm进行提取信息 gnms = module.get("gnms") content = glm_utils.CallContentResultNew(gnms) mysql.sql_change_msg( """UPDATE gnms_gml SET glm_desc = "%s" WHERE id = %d""" % ( content if content else None, module.get("id"))) print(content) def info_word_project(): mysql = mysql_pool.ConnMysql() module_list1 = mysql.sql_select_many( """select jsnr from user_history_data where xmmc = '2023年丽水市云和县数字法治门户建设项目' """) module_list2 = mysql.sql_select_many( """select jsnr from user_history_data where xmmc IN ('浙江省第二监狱重点罪犯管控模型项目', '浙江省农村水电站管理数字化应用', '浙江省河湖库保护数字化应用建设项目', '浙江省环境遥感监测业务智治', '平台项目', '浙江林业智媒平台项目', '未来e家应用建设方案', '浙江省智慧林业云平台升级改造项目建设方案', '为侨服务“全球通”平台二期建设项目')""") json_objects = [] for module_info1 in module_list1: for jsnr1Ele in module_info1["jsnr"].split('-----》'): for module_info2 in module_list2: for jsnr2Ele in module_info2["jsnr"].split('-----》'): str = "A:%s\nB:%s" % (jsnr1Ele, jsnr2Ele) data = { "instruction": "现在你是一个政府提案的查重检查人员,给定两段话A和B:让我们一步步思考并判断是否相似。请以相似度从高、中、低三个等级进行评价,并给出理由。", "input": str, "output": "" } json_objects.append(data) with open('其他-建设内容.json', 'w') as f: for json_obj in json_objects: json_str = json.dumps(json_obj, ensure_ascii=False) # 将JSON对象转换为字符串 f.write(json_str + '\n') # 写入字符串,并添加换行符 def info_word1(): mysql = mysql_pool.ConnMysql() # module_list1 = mysql.sql_select_many("""select gnms from user_history_module_data where xmmc = '莲智社区' """) # module_list2 = mysql.sql_select_many("""select gnms from user_history_module_data where xmmc IN ('古堰画乡智慧客厅项目—未来社区智慧服务平台', '未来e家')""") module_list1 = mysql.sql_select_many("""select gnms from user_history_module_data where xmmc = '丽水市遂昌县政法委数字法治综合应用' """) module_list2 = mysql.sql_select_many("""select gnms from user_history_module_data where xmmc IN ('浙江省第二监狱重点罪犯管控模型项目', '浙江省农村水电站管理数字化应用', '浙江省河湖库保护数字化应用建设项目', '浙江省环境遥感监测业务智治', '平台项目', '浙江林业智媒平台项目', '未来e家应用建设方案', '浙江省智慧林业云平台升级改造项目建设方案', '为侨服务“全球通”平台二期建设项目')""") json_objects = [] for module_info1 in module_list1: for module_info2 in module_list2: str = "A:%s\nB:%s" % (module_info1["gnms"], module_info2["gnms"]) data = { "instruction": "现在你是一个政府提案的查重检查人员,给定两段话A和B:让我们一步步思考并判断是否相似。请以相似度从高、中、低三个等级进行评价,并给出理由。", "input": str, "output": "" } json_objects.append(data) with open('其他-功能模块对比.json', 'w') as f: for json_obj in json_objects: json_str = json.dumps(json_obj, ensure_ascii=False) # 将JSON对象转换为字符串 f.write(json_str + '\n') # 写入字符串,并添加换行符 def info_word_project_yw(): mysql = mysql_pool.ConnMysql() module_list1 = mysql.sql_select_many( """select jsnr from user_history_data where xmmc = '2023年丽水市云和县数字法治门户建设项目' """) module_list2 = mysql.sql_select_many( """select jsnr from user_history_data where xmmc IN ('2023年丽水市云和县数字法治门户建设项目', '浙江省司法厅全域数字法治监督应用系统(一期)', '丽水市遂昌县政法委数字法治综合应用', '丽水市龙泉市政法委法治龙泉门户', '庆元县数字法治综合门户')""") json_objects = [] for module_info1 in module_list1: for jsnr1Ele in module_info1["jsnr"].split('-----》'): for module_info2 in module_list2: for jsnr2Ele in module_info2["jsnr"].split('-----》'): str = "A:%s\nB:%s" % (jsnr1Ele, jsnr2Ele) data = { "instruction": "现在你是一个政府提案的查重检查人员,给定两段话A和B:让我们一步步思考并判断是否相似。请以相似度从高、中、低三个等级进行评价,并给出理由。", "input": str, "output": "" } json_objects.append(data) with open('其他-建设内容.json', 'w') as f: for json_obj in json_objects: json_str = json.dumps(json_obj, ensure_ascii=False) # 将JSON对象转换为字符串 f.write(json_str + '\n') # 写入字符串,并添加换行符 if __name__ == "__main__": info_word1() print("ok.......") path = r"/Users/kebobo/Downloads/丽水/未来社区" data_list = os.listdir(path) for file in data_list: if file != '.DS_Store': data_list = [(0, path + '/' + file, "")] project_check(data_list, "2024-07-27-数字法治") print("已存入************************************* %s" % file) """ 建设目标,业务功能 gnmk_str = [] for d in data: if pd.notnull(d[0]): title = d[0] if title == "功能模块": er_title.add(d[dup_file_test]) join_str = "" for i in d[dup_file_test:]: if pd.notnull(i): join_str += i if title == "功能模块": gnmk_str.append(i) str_dict[wdys1.get(title)] = join_str else: if title == "功能模块": er_title.add(d[dup_file_test]) for i in d[dup_file_test:]: if pd.notnull(i): join_str += i if title == "功能模块": gnmk_str.append(i) str_dict[wdys1.get(title)] = str_dict.get(wdys1.get(title)) + join_str gnmk = "".join(gnmk_str) """