|
- # coding=utf-8
-
- import re
- import mysql_pool
- from pymysql.converters import escape_string
- import cosin_similarity
- import pandas as pd
- import glm_utils
- import os
- import json
-
- wdys1 = {
- "项目名称": "xmmc",
- "现状问题": "xzwt",
- "系统基础": "xtjc",
- "项目目标": "xmmb",
- "预期绩效": "yqjx",
- "建设需求": "jsxq",
- "数据需求": "sjxq",
- "安全需求": "aqxq",
- "业务领域": "ywly",
- "核心业务": "hxyw",
- "业务需求": "ywxq",
- "业务协同": "ywxt",
- "建设层级": "jscj",
- "用户范围": "yhfw",
- "目标群体": "mbqt",
- "建设内容": "jsnr",
- "功能模块": "gnmk",
- "数据共享": "sjgx",
- "智能要素": "znys",
- "申报单位": "sbdw",
- "所属地区": "ssdq",
- "预算年度": "ysnd"
- }
- wdys2 = {
- "xmmc": "项目名称",
- "xzwt": "现状问题",
- "xtjc": "系统基础",
- "xmmb": "项目目标",
- "yqjx": "预期绩效",
- "jsxq": "建设需求",
- "sjxq": "数据需求",
- "aqxq": "安全需求",
- "ywly": "业务领域",
- "hxyw": "核心业务",
- "ywxq": "业务需求",
- "ywxt": "业务协同",
- "jscj": "建设层级",
- "yhfw": "用户范围",
- "mbqt": "目标群体",
- "jsnr": "建设内容",
- "gnmk": "功能模块",
- "sjgx": "数据共享",
- "znys": "智能要素",
- "sbdw": "申报单位",
- "ssdq": "所属地区",
- "ysnd": "预算年度"
- }
- gnmkys = {
- "gnmc": "功能名称",
- "gnms": "功能描述"
- }
-
-
- def getFlag():
- data_dict = {}
- df = pd.read_excel("0825-丽水系统查重维度.xlsx")
- data = df.values
- data = list(pd.Series(data[:, 1]).dropna())
- for d in data:
- try:
- wd = re.search("(.*?)(.*?%)", d).group(1).strip()
- wdc = wdys1.get(wd)
- if wdc:
- qz = re.search(".*?((.*?%))", d).group(1)
- data_dict[wdc] = qz
- except:
- pass
- return data_dict
-
- # getFlag()
-
- def gong_neng_mo_kuai(xmmc, mysql, dl, data, er_title, line):
- # 将excel文件中的所有第三维度内容进行拼接
- str_dict = {}
- for et in er_title:
- for d in data:
- if d[1] == et:
- if str_dict.get(et):
- str_dict[et] = str_dict.get(et) + d[3]
- else:
- str_dict[et] = d[3]
- for k, v in str_dict.items():
- mysql.sql_change_msg("""insert into user_history_module_data(xmmc,gnmc,gnms,line, remark) value("%s", "%s", "%s", "%s", "%s")""" % (
- escape_string(xmmc), escape_string(k), escape_string(v), line, ""))
- #
- similarity = cosin_similarity.CosineSimilarity(v, v)
- similarity, keywords_x, keywords_y = similarity.main()
- mysql.sql_change_msg("""insert into user_history_module_keywords (xmmc,gnmc,gnms,line) value("%s" ,"%s", "%s", "%s")""" % (
- xmmc, escape_string(k), str(keywords_y)[1:-1], line))
-
-
- def project_check(data_list, line):
- mysql = mysql_pool.ConnMysql()
- # 读取维度和权重
- # get_data_dict = getFlag()
- # 遍历excel存储路径
- for dl in data_list:
- # path = "0825-丽水系统查重维度1.xlsx"
- # 读取路径下的excel
- print(dl)
- df = pd.read_excel(dl[1])
- xmmc = df.keys()
- # print(type(xmmc[dup_file_test]))
- xmmc=xmmc[1]
- # print(type(xmmc))
- # xmmc1=''
-
- if "可研报告"or "可研性报告"or "可行性研究报告" in xmmc:
- xmmc=xmmc.replace('可研报告','')
- xmmc=xmmc.replace('可研性报告','')
- xmmc=xmmc.replace('可行性研究报告','')
- # print(xmmc)
- data = df.values
- # 将excel文件中的所有维度内容进行拼接
- join_str = ""
- str_dict = {}
- title = ""
- er_title = set()
- # for d in data:
- # # print(d)
- # if pd.notnull(d[0]):
- # title = d[0]
- # if title == "功能模块":
- # er_title.add(d[dup_file_test])
- # join_str = ""
- # for i in d[dup_file_test:]:
- # if pd.notnull(i):
- # join_str += i
- # str_dict[wdys1.get(title)] = join_str
- # else:
- # if title == "功能模块":
- # er_title.add(d[dup_file_test])
- # for i in d[dup_file_test:]:
- # if pd.notnull(i):
- # join_str += i
- # str_dict[wdys1.get(title)] = str_dict.get(wdys1.get(title)) + join_str
- # print(str_dict)
- gnmk_str = []
- # print(data)
- for d in data:
- if pd.notnull(d[0]):
- title = d[0]
- if title == "功能模块":
- er_title.add(d[1])
- join_str = ""
- for i in d[1:]:
- # print(type(i))
- # i=str(i)
- if pd.notnull(i):
- join_str += str(i)
- if title == "功能模块":
- # for j in d[3:]:
- if i == '功能描述':
- continue
- else:
- gnmk_str.append(i)
- str_dict[wdys1.get(title)] = join_str
- # print(str_dict.get(wdys1.get(title)))
- else:
- if title == "功能模块":
- er_title.add(d[1])
- for i in d[3:]:
- if pd.notnull(i):
- join_str += str(i)
- if title == "功能模块":
- gnmk_str.append(i)
- str_dict[wdys1.get(title)] = str_dict.get(wdys1.get(title)) + join_str
- # gnmk="".join(gnmk_str)
- # str_dict['gnmk']=gnmk
- gnmk = ",".join(gnmk_str)
- str_dict['gnmk'] = gnmk
- # print(str_dict)
- # print(str_dict.get("xzwt")if str_dict.get("xzwt") else None)
- # print(str_dict.get('gnmk')if str_dict.get('gnmk')else None)
- mysql.sql_change_msg(
- """insert into user_history_data (xmmc, xzwt, xtjc, xmmb, yqjx, jsxq, sjxq, aqxq, ywly, hxyw, ywxq, ywxt, jscj, yhfw, mbqt, jsnr, gnmk, sjgx, znys,sbdw,ssdq,ysnd,line,remark) value ("%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s","%s","%s","%s","%s","%s")"""
- % (escape_string(xmmc),
- escape_string(str_dict.get("xzwt")) if str_dict.get("xzwt") else None,
- escape_string(str_dict.get("xtjc")) if str_dict.get("xtjc") else None,
- escape_string(str_dict.get("xmmb")) if str_dict.get("xmmb") else None,
- escape_string(str_dict.get("yqjx")) if str_dict.get("yqjx") else None,
- escape_string(str_dict.get("jsxq")) if str_dict.get("jsxq") else None,
- escape_string(str_dict.get("sjxq")) if str_dict.get("sjxq") else None,
- escape_string(str_dict.get("aqxq")) if str_dict.get("aqxq") else None,
- escape_string(str_dict.get("ywly")) if str_dict.get("ywly") else None,
- escape_string(str_dict.get("hxyw")) if str_dict.get("hxyw") else None,
- escape_string(str_dict.get("ywxq")) if str_dict.get("ywxq") else None,
- escape_string(str_dict.get("ywxt")) if str_dict.get("ywxt") else None,
- escape_string(str_dict.get("jscj")) if str_dict.get("jscj") else None,
- escape_string(str_dict.get("yhfw")) if str_dict.get("yhfw") else None,
- escape_string(str_dict.get("mbqt")) if str_dict.get("mbqt") else None,
- escape_string(str_dict.get("jsnr")) if str_dict.get("jsnr") else None,
- escape_string(str_dict.get("gnmk")) if str_dict.get("gnmk") else None,
- escape_string(str_dict.get("sjgx")) if str_dict.get("sjgx") else None,
- escape_string(str_dict.get("znys")) if str_dict.get("znys") else None,
- escape_string(str_dict.get("sbdw")) if str_dict.get("sbdw") else None,
- escape_string(str_dict.get("ssdq")) if str_dict.get("ssdq") else None,
- escape_string(str_dict.get("ysnd")) if str_dict.get("ysnd") else None,
- line, ""))
- project_gjc = {}
- for w in wdys2.keys():
- content_x = str_dict.get(w)
- content_y = str_dict.get(w)
- if content_x and content_y:
- # 循环遍历每一个维度
- similarity = cosin_similarity.CosineSimilarity(content_x, content_y)
- # 相似度 关键词
- similarity, keywords_x, keywords_y = similarity.main()
- project_gjc[w] = keywords_y
- mysql.sql_change_msg(
- """insert into user_history_keywords (xmmc, xzwt, xtjc, xmmb, yqjx, jsxq, sjxq, aqxq, ywly, hxyw, ywxq, ywxt, jscj, yhfw, mbqt, jsnr, gnmk, sjgx, znys, line) value ("%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s")"""
- % (xmmc, str(project_gjc.get("xzwt"))[1:-1] if project_gjc.get("xzwt") else None,
- str(project_gjc.get("xtjc"))[1:-1] if project_gjc.get("xtjc") else None,
- str(project_gjc.get("xmmb"))[1:-1] if project_gjc.get("xmmb") else None,
- str(project_gjc.get("yqjx"))[1:-1] if project_gjc.get("yqjx") else None,
- str(project_gjc.get("jsxq"))[1:-1] if project_gjc.get("jsxq") else None,
- str(project_gjc.get("sjxq"))[1:-1] if project_gjc.get("sjxq") else None,
- str(project_gjc.get("aqxq"))[1:-1] if project_gjc.get("aqxq") else None,
- str(project_gjc.get("ywly"))[1:-1] if project_gjc.get("ywly") else None,
- str(project_gjc.get("hxyw"))[1:-1] if project_gjc.get("hxyw") else None,
- str(project_gjc.get("ywxq"))[1:-1] if project_gjc.get("ywxq") else None,
- str(project_gjc.get("ywxt"))[1:-1] if project_gjc.get("ywxt") else None,
- str(project_gjc.get("jscj"))[1:-1] if project_gjc.get("jscj") else None,
- str(project_gjc.get("yhfw"))[1:-1] if project_gjc.get("yhfw") else None,
- str(project_gjc.get("mbqt"))[1:-1] if project_gjc.get("mbqt") else None,
- str(project_gjc.get("jsnr"))[1:-1] if project_gjc.get("jsnr") else None,
- str(project_gjc.get("gnmk"))[1:-1] if project_gjc.get("gnmk") else None,
- str(project_gjc.get("sjgx"))[1:-1] if project_gjc.get("sjgx") else None,
- str(project_gjc.get("znys"))[1:-1] if project_gjc.get("znys") else None,
- line))
-
- gong_neng_mo_kuai(xmmc, mysql, dl, data, er_title, line)
-
-
-
- def update_desc():
- mysql = mysql_pool.ConnMysql()
- module_list = mysql.sql_select_many("""select id, gnms from user_history_module_data where xmmc = '丽水市城市管理指挥中心信息系统(一期)项目'""")
- for module in module_list:
- # 通过chatglm进行提取信息
- gnms = module.get("gnms")
- content = glm_utils.CallContentResultNew(gnms)
-
- mysql.sql_change_msg(
- """UPDATE user_history_module_data SET glm_desc = "%s" WHERE id = %d""" % (
- content if content else None,
- module.get("id")))
- print(content)
-
-
- def update_desc1():
- mysql = mysql_pool.ConnMysql()
- module_list = mysql.sql_select_many("""select id, gnms from gnms_gml where xmmc = '丽水花园云(城市大脑)数字驾驶舱项目'""")
- for module in module_list:
- # 通过chatglm进行提取信息
- gnms = module.get("gnms")
- content = glm_utils.CallContentResultNew(gnms)
-
- mysql.sql_change_msg(
- """UPDATE gnms_gml SET glm_desc = "%s" WHERE id = %d""" % (
- content if content else None,
- module.get("id")))
- print(content)
-
-
- def info_word_project():
- mysql = mysql_pool.ConnMysql()
- module_list1 = mysql.sql_select_many(
- """select jsnr from user_history_data where xmmc = '2023年丽水市云和县数字法治门户建设项目' """)
- module_list2 = mysql.sql_select_many(
- """select jsnr from user_history_data where xmmc IN ('浙江省第二监狱重点罪犯管控模型项目',
- '浙江省农村水电站管理数字化应用',
- '浙江省河湖库保护数字化应用建设项目',
- '浙江省环境遥感监测业务智治',
- '平台项目',
- '浙江林业智媒平台项目',
- '未来e家应用建设方案',
- '浙江省智慧林业云平台升级改造项目建设方案',
- '为侨服务“全球通”平台二期建设项目')""")
- json_objects = []
- for module_info1 in module_list1:
- for jsnr1Ele in module_info1["jsnr"].split('-----》'):
- for module_info2 in module_list2:
- for jsnr2Ele in module_info2["jsnr"].split('-----》'):
- str = "A:%s\nB:%s" % (jsnr1Ele, jsnr2Ele)
- data = {
- "instruction": "现在你是一个政府提案的查重检查人员,给定两段话A和B:让我们一步步思考并判断是否相似。请以相似度从高、中、低三个等级进行评价,并给出理由。",
- "input": str,
- "output": ""
- }
- json_objects.append(data)
-
- with open('其他-建设内容.json', 'w') as f:
- for json_obj in json_objects:
- json_str = json.dumps(json_obj, ensure_ascii=False) # 将JSON对象转换为字符串
- f.write(json_str + '\n') # 写入字符串,并添加换行符
-
-
-
- def info_word1():
- mysql = mysql_pool.ConnMysql()
- # module_list1 = mysql.sql_select_many("""select gnms from user_history_module_data where xmmc = '莲智社区' """)
- # module_list2 = mysql.sql_select_many("""select gnms from user_history_module_data where xmmc IN ('古堰画乡智慧客厅项目—未来社区智慧服务平台', '未来e家')""")
- module_list1 = mysql.sql_select_many("""select gnms from user_history_module_data where xmmc = '丽水市遂昌县政法委数字法治综合应用' """)
- module_list2 = mysql.sql_select_many("""select gnms from user_history_module_data where xmmc IN ('浙江省第二监狱重点罪犯管控模型项目',
- '浙江省农村水电站管理数字化应用',
- '浙江省河湖库保护数字化应用建设项目',
- '浙江省环境遥感监测业务智治',
- '平台项目',
- '浙江林业智媒平台项目',
- '未来e家应用建设方案',
- '浙江省智慧林业云平台升级改造项目建设方案',
- '为侨服务“全球通”平台二期建设项目')""")
-
- json_objects = []
- for module_info1 in module_list1:
- for module_info2 in module_list2:
- str = "A:%s\nB:%s" % (module_info1["gnms"], module_info2["gnms"])
- data = {
- "instruction": "现在你是一个政府提案的查重检查人员,给定两段话A和B:让我们一步步思考并判断是否相似。请以相似度从高、中、低三个等级进行评价,并给出理由。",
- "input": str,
- "output": ""
- }
- json_objects.append(data)
-
- with open('其他-功能模块对比.json', 'w') as f:
- for json_obj in json_objects:
- json_str = json.dumps(json_obj, ensure_ascii=False) # 将JSON对象转换为字符串
- f.write(json_str + '\n') # 写入字符串,并添加换行符
-
-
- def info_word_project_yw():
- mysql = mysql_pool.ConnMysql()
- module_list1 = mysql.sql_select_many(
- """select jsnr from user_history_data where xmmc = '2023年丽水市云和县数字法治门户建设项目' """)
- module_list2 = mysql.sql_select_many(
- """select jsnr from user_history_data where xmmc IN ('2023年丽水市云和县数字法治门户建设项目', '浙江省司法厅全域数字法治监督应用系统(一期)', '丽水市遂昌县政法委数字法治综合应用', '丽水市龙泉市政法委法治龙泉门户', '庆元县数字法治综合门户')""")
- json_objects = []
- for module_info1 in module_list1:
- for jsnr1Ele in module_info1["jsnr"].split('-----》'):
- for module_info2 in module_list2:
- for jsnr2Ele in module_info2["jsnr"].split('-----》'):
- str = "A:%s\nB:%s" % (jsnr1Ele, jsnr2Ele)
- data = {
- "instruction": "现在你是一个政府提案的查重检查人员,给定两段话A和B:让我们一步步思考并判断是否相似。请以相似度从高、中、低三个等级进行评价,并给出理由。",
- "input": str,
- "output": ""
- }
- json_objects.append(data)
-
- with open('其他-建设内容.json', 'w') as f:
- for json_obj in json_objects:
- json_str = json.dumps(json_obj, ensure_ascii=False) # 将JSON对象转换为字符串
- f.write(json_str + '\n') # 写入字符串,并添加换行符
-
-
- if __name__ == "__main__":
- info_word1()
- print("ok.......")
- path = r"/Users/kebobo/Downloads/丽水/未来社区"
- data_list = os.listdir(path)
- for file in data_list:
- if file != '.DS_Store':
- data_list = [(0, path + '/' + file, "")]
- project_check(data_list, "2024-07-27-数字法治")
- print("已存入************************************* %s" % file)
-
- """
- 建设目标,业务功能
-
- gnmk_str = []
- for d in data:
- if pd.notnull(d[0]):
- title = d[0]
- if title == "功能模块":
- er_title.add(d[dup_file_test])
- join_str = ""
- for i in d[dup_file_test:]:
- if pd.notnull(i):
- join_str += i
- if title == "功能模块":
- gnmk_str.append(i)
- str_dict[wdys1.get(title)] = join_str
- else:
- if title == "功能模块":
- er_title.add(d[dup_file_test])
- for i in d[dup_file_test:]:
- if pd.notnull(i):
- join_str += i
- if title == "功能模块":
- gnmk_str.append(i)
- str_dict[wdys1.get(title)] = str_dict.get(wdys1.get(title)) + join_str
- gnmk = "".join(gnmk_str)
-
-
- """
-
|