丽水查重代码
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

66 lines
2.4KB

  1. import os
  2. os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'
  3. from modelscope.pipelines import pipeline
  4. from modelscope.utils.constant import Tasks
  5. # 替换换行字符
  6. def replace_newlines(text, new_line=''):
  7. # 替换所有类型的换行符
  8. return text.replace('\r\n', new_line).replace('\r', new_line).replace('\n', new_line)
  9. # 使用bert计算文本相识度
  10. class Bert_nlp(object):
  11. def __init__(self, nlp_type):
  12. self.nlp_type = nlp_type
  13. if nlp_type == "structbert":
  14. model_id = "damo/nlp_structbert_sentence-similarity_chinese-large"
  15. self.semantic_cls = pipeline(Tasks.sentence_similarity, model_id)
  16. elif nlp_type == "corom":
  17. # , sequence_length=1024 /Users/kebobo/.cache/modelscope/hub/damo/nlp_corom_sentence-embedding_chinese-tiny
  18. model_id = "damo/nlp_corom_sentence-embedding_chinese-tiny"
  19. self.semantic_cls = pipeline(Tasks.sentence_embedding, model=model_id)
  20. def main(self, content1, contents):
  21. # if content1 is None or content1 == "None":
  22. # return 0, "", "", -1
  23. score = 0.0
  24. if len(contents) == 0:
  25. return score, content1, "", -1
  26. if self.nlp_type == "structbert":
  27. result = self.semantic_cls(input=(content1, contents[0]))
  28. print(result)
  29. labels = result["labels"]
  30. acq = labels.index("1")
  31. score = result["scores"][acq]
  32. elif self.nlp_type == "corom":
  33. inputs = {
  34. "source_sentence": [
  35. replace_newlines(content1)
  36. ],
  37. "sentences_to_compare": contents
  38. }
  39. result = self.semantic_cls(input=inputs)
  40. print(result)
  41. arr = result["scores"]
  42. score = max(arr)
  43. idx = arr.index(score)
  44. return score, content1, contents[idx], idx
  45. if __name__ == "__main__":
  46. content1 = """主要功能为快速进行学生课堂评价及小组评价"""
  47. content2 = ["""用户通过建设单位账户进入建设单位门户,建设单位门户主要展示本单位项目信息、通知公告与政策文件栏、待办事项栏、本单位进行中项目栏模块。""",
  48. """主要功能为快速进行学生课堂评价及小组评价"""
  49. ]
  50. nlp = Bert_nlp("corom")
  51. print(nlp.main(content1, content2))