|
- import os
- os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'
-
- from modelscope.pipelines import pipeline
- from modelscope.utils.constant import Tasks
-
-
- # 替换换行字符
- def replace_newlines(text, new_line=''):
- # 替换所有类型的换行符
- return text.replace('\r\n', new_line).replace('\r', new_line).replace('\n', new_line)
-
-
- # 使用bert计算文本相识度
- class Bert_nlp(object):
-
- def __init__(self, nlp_type):
- self.nlp_type = nlp_type
- if nlp_type == "structbert":
- model_id = "damo/nlp_structbert_sentence-similarity_chinese-large"
- self.semantic_cls = pipeline(Tasks.sentence_similarity, model_id)
- elif nlp_type == "corom":
- # , sequence_length=1024 /Users/kebobo/.cache/modelscope/hub/damo/nlp_corom_sentence-embedding_chinese-tiny
- model_id = "damo/nlp_corom_sentence-embedding_chinese-tiny"
- self.semantic_cls = pipeline(Tasks.sentence_embedding, model=model_id)
-
- def main(self, content1, contents):
- # if content1 is None or content1 == "None":
- # return 0, "", "", -1
- score = 0.0
- if len(contents) == 0:
- return score, content1, "", -1
- if self.nlp_type == "structbert":
- result = self.semantic_cls(input=(content1, contents[0]))
- print(result)
- labels = result["labels"]
- acq = labels.index("1")
- score = result["scores"][acq]
- elif self.nlp_type == "corom":
- inputs = {
- "source_sentence": [
- replace_newlines(content1)
- ],
- "sentences_to_compare": contents
- }
- result = self.semantic_cls(input=inputs)
- print(result)
-
- arr = result["scores"]
- score = max(arr)
- idx = arr.index(score)
-
- return score, content1, contents[idx], idx
-
-
-
-
- if __name__ == "__main__":
- content1 = """主要功能为快速进行学生课堂评价及小组评价"""
- content2 = ["""用户通过建设单位账户进入建设单位门户,建设单位门户主要展示本单位项目信息、通知公告与政策文件栏、待办事项栏、本单位进行中项目栏模块。""",
- """主要功能为快速进行学生课堂评价及小组评价"""
- ]
- nlp = Bert_nlp("corom")
- print(nlp.main(content1, content2))
|