import os os.environ['KMP_DUPLICATE_LIB_OK'] = 'True' from modelscope.pipelines import pipeline from modelscope.utils.constant import Tasks # 替换换行字符 def replace_newlines(text, new_line=''): # 替换所有类型的换行符 return text.replace('\r\n', new_line).replace('\r', new_line).replace('\n', new_line) # 使用bert计算文本相识度 class Bert_nlp(object): def __init__(self, nlp_type): self.nlp_type = nlp_type if nlp_type == "structbert": model_id = "damo/nlp_structbert_sentence-similarity_chinese-large" self.semantic_cls = pipeline(Tasks.sentence_similarity, model_id) elif nlp_type == "corom": # , sequence_length=1024 /Users/kebobo/.cache/modelscope/hub/damo/nlp_corom_sentence-embedding_chinese-tiny model_id = "damo/nlp_corom_sentence-embedding_chinese-tiny" self.semantic_cls = pipeline(Tasks.sentence_embedding, model=model_id) def main(self, content1, contents): # if content1 is None or content1 == "None": # return 0, "", "", -1 score = 0.0 if len(contents) == 0: return score, content1, "", -1 if self.nlp_type == "structbert": result = self.semantic_cls(input=(content1, contents[0])) print(result) labels = result["labels"] acq = labels.index("1") score = result["scores"][acq] elif self.nlp_type == "corom": inputs = { "source_sentence": [ replace_newlines(content1) ], "sentences_to_compare": contents } result = self.semantic_cls(input=inputs) print(result) arr = result["scores"] score = max(arr) idx = arr.index(score) return score, content1, contents[idx], idx if __name__ == "__main__": content1 = """主要功能为快速进行学生课堂评价及小组评价""" content2 = ["""用户通过建设单位账户进入建设单位门户,建设单位门户主要展示本单位项目信息、通知公告与政策文件栏、待办事项栏、本单位进行中项目栏模块。""", """主要功能为快速进行学生课堂评价及小组评价""" ] nlp = Bert_nlp("corom") print(nlp.main(content1, content2))