import mysql_pool import heapq import uuid import os os.environ['KMP_DUPLICATE_LIB_OK'] = 'True' from modelscope.pipelines import pipeline from modelscope.utils.constant import Tasks def check_demo(): batch_no = str(uuid.uuid4()) mysql = mysql_pool.ConnMysql() data = mysql.sql_select_many("""select * from test_pro_info_new where super_unit LIKE '%农%'""") # 全部的项目信息 # pro_map = [] # for ap1 in data: # print(type(ap1.get("base_proj_intro"))) # # pro_list.append((ap.get("base_proj_name"), ap.get("base_proj_intro"))) # pro_map[ap1.get("base_proj_intro")] = ap1.get("base_proj_name") # 获取模型 model_id = "damo/nlp_corom_sentence-embedding_chinese-tiny" semantic_cls = pipeline(Tasks.sentence_embedding, model=model_id) for pro in data: # try: pro_info_list = [] # print(pro.get("base_area_code")[0:4]) for ap in data: # if ap.get("base_proj_intro") != pro.get("base_proj_intro") and ap.get("base_area_code")[0:4] == pro.get("base_area_code")[0:4]: if ap.get("base_proj_intro") != pro.get("base_proj_intro"): pro_info_list.append(str(ap.get("base_proj_intro")).replace('\n', '')) inputs = { "source_sentence": [ pro.get("base_proj_intro") ], "sentences_to_compare": pro_info_list } result = semantic_cls(input=inputs) print(result) arr = result["scores"] top_3 = heapq.nlargest(3, arr) for ele in top_3: idx = arr.index(ele) # print(pro_info_list[idx]) for ele1 in data: if ele1.get("base_proj_intro") == pro_info_list[idx]: mysql.sql_change_msg( """insert into test_pro_check (pro_name, pro_info, check_pro_name, check_pro_info, batch_no, score, pro_area, check_pro_area, pro_set_year, check_pro_set_year, create_time) value("%s" ,"%s", "%s", "%s", "%s", "%f", "%s", "%s", "%s", "%s", now())""" % ( pro.get("base_proj_name"), pro.get("base_proj_intro"), ele1.get("base_proj_name"), pro_info_list[idx], batch_no, ele, pro.get("base_area_name"), ele1.get("base_area_name"), pro.get("base_proj_set_year"), ele1.get("base_proj_set_year"))) break # except Exception: # mysql.sql_change_msg( # """insert into test_pro_check (pro_name, pro_info, batch_no, score, pro_area, pro_set_year, create_time) value("%s" ,"%s", "%s", "%f", "%s", "%s", now())""" % ( # pro.get("base_proj_name"), pro.get("base_proj_intro"), batch_no, 0, pro.get("base_area_name"), pro.get("base_proj_set_year"))) if __name__ == "__main__": check_demo()