import unittest import pandas as pd from mycode import main class PhraseExemptionRulesTest(unittest.TestCase): def test_builds_exemption_rules_from_suggestion_phrases(self): rules_df = pd.DataFrame([ { '错误表述': '深入贯彻中央八项规定精神', '建议修改词语': '深入贯彻中央八项规定精神学习教育', '错误分类': '固定表述错误' }, { '错误表述': '“两学一做”学习', '建议修改词语': '“两学一做”学习教育', '错误分类': '固定表述错误' }, ]) exemption_rules = main.build_phrase_exemption_rules(rules_df) self.assertEqual( exemption_rules['深入贯彻中央八项规定精神'], ['深入贯彻中央八项规定精神学习教育'] ) self.assertEqual( exemption_rules['“两学一做”学习'], ['“两学一做”学习教育'] ) def test_skips_short_error_phrase_when_full_correct_phrase_exists(self): exemption_rules = { '深入贯彻中央八项规定精神': ['深入贯彻中央八项规定精神学习教育'] } should_skip = main.should_skip_error_phrase( '深入贯彻中央八项规定精神', '现开展深入贯彻中央八项规定精神学习教育相关工作。', exemption_rules ) self.assertTrue(should_skip) def test_does_not_skip_when_only_short_error_phrase_exists(self): exemption_rules = { '深入贯彻中央八项规定精神': ['深入贯彻中央八项规定精神学习教育'] } should_skip = main.should_skip_error_phrase( '深入贯彻中央八项规定精神', '文章仅写到深入贯彻中央八项规定精神,没有写完整。', exemption_rules ) self.assertFalse(should_skip) class DeletedWechatContentFilterTest(unittest.TestCase): def test_filters_rows_when_url_page_says_deleted(self): rows = [ [1, '公众号A', '标题A', '错误表述A', '建议A', '分类A', 'https://a.test/1'], [2, '公众号B', '标题B', '错误表述B', '建议B', '分类B', 'https://b.test/2'], ] def fetch_html(url): if url == 'https://a.test/1': return '该内容已被发布者删除' return '正常文章内容' filtered_rows = main.filter_deleted_wechat_rows(rows, fetch_html=fetch_html) self.assertEqual( filtered_rows, [[2, '公众号B', '标题B', '错误表述B', '建议B', '分类B', 'https://b.test/2']] ) def test_keeps_rows_when_url_page_is_not_deleted(self): rows = [ [1, '公众号A', '标题A', '错误表述A', '建议A', '分类A', 'https://a.test/1'], ] filtered_rows = main.filter_deleted_wechat_rows( rows, fetch_html=lambda url: '正文还在' ) self.assertEqual(filtered_rows, rows) def test_checks_same_url_only_once(self): rows = [ [1, '公众号A', '标题A', '错误表述A', '建议A', '分类A', 'https://same.test/1'], [2, '公众号A', '标题B', '错误表述B', '建议B', '分类B', 'https://same.test/1'], ] calls = [] def fetch_html(url): calls.append(url) return '正常文章内容' filtered_rows = main.filter_deleted_wechat_rows(rows, fetch_html=fetch_html) self.assertEqual(filtered_rows, rows) self.assertEqual(calls, ['https://same.test/1']) def test_keeps_rows_when_fetch_fails(self): rows = [ [1, '公众号A', '标题A', '错误表述A', '建议A', '分类A', 'https://a.test/1'], ] def fetch_html(url): raise TimeoutError(url) filtered_rows = main.filter_deleted_wechat_rows( rows, fetch_html=fetch_html ) self.assertEqual(filtered_rows, rows) def test_reports_url_check_progress(self): rows = [ [1, '公众号A', '标题A', '错误表述A', '建议A', '分类A', 'https://a.test/1'], [2, '公众号B', '标题B', '错误表述B', '建议B', '分类B', 'https://b.test/2'], [3, '公众号A', '标题C', '错误表述C', '建议C', '分类C', 'https://a.test/1'], ] progress = [] main.filter_deleted_wechat_rows( rows, fetch_html=lambda url: '正常文章内容', progress_callback=lambda completed, total: progress.append( (completed, total) ) ) self.assertEqual(progress[-1], (2, 2)) self.assertEqual(len(progress), 2) class WechatAnalysisProgressTest(unittest.TestCase): def test_reports_rule_scan_progress(self): rules_df = pd.DataFrame([ { '错误表述': '错误A', '建议修改词语': '修改A', '错误分类': '分类A' }, { '错误表述': '错误B', '建议修改词语': '修改B', '错误分类': '分类B' }, ]) articles_df = pd.DataFrame([ { 'nickname': '公众号A', 'title': '标题A', 'content': '这里包含错误A', 'content_url': 'https://a.test/1' } ]) progress = [] rows = main.ana_wechat( rules_df=rules_df, articles_df=articles_df, progress_callback=lambda completed, total: progress.append( (completed, total) ), fetch_html=lambda url: '正常文章内容' ) self.assertEqual(len(rows), 1) self.assertEqual(progress[-1], (2, 2)) if __name__ == '__main__': unittest.main()