SensitiveDetectionProvider.cs
6.26 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
using NCC.Dependency;
using NCC.Reflection;
using NCC.Templates.Extensions;
using Microsoft.Extensions.Caching.Distributed;
using System;
using System.Collections.Generic;
using System.Text;
using System.Threading.Tasks;
namespace NCC.SensitiveDetection
{
/// <summary>
/// 脱敏词汇(脱敏)提供器(默认实现)
/// </summary>
[SuppressSniffer]
public class SensitiveDetectionProvider : ISensitiveDetectionProvider
{
/// <summary>
/// 分布式缓存
/// </summary>
private readonly IDistributedCache _distributedCache;
/// <summary>
/// 构造函数
/// </summary>
/// <param name="distributedCache"></param>
public SensitiveDetectionProvider(IDistributedCache distributedCache)
{
_distributedCache = distributedCache;
}
/// <summary>
/// 分布式缓存键
/// </summary>
private const string DISTRIBUTED_KEY = "SENSITIVE:WORDS";
/// <summary>
/// 返回所有脱敏词汇
/// </summary>
/// <returns></returns>
public async Task<IEnumerable<string>> GetWordsAsync()
{
// 读取缓存数据
var wordsCached = await _distributedCache.GetStringAsync(DISTRIBUTED_KEY);
if (wordsCached != null) return wordsCached.Split(new[] { "\r\n" }, StringSplitOptions.RemoveEmptyEntries);
var entryAssembly = Reflect.GetEntryAssembly();
// 解析嵌入式文件流
byte[] buffer;
using (var readStream = entryAssembly.GetManifestResourceStream($"{Reflect.GetAssemblyName(entryAssembly)}.sensitive-words.txt"))
{
buffer = new byte[readStream.Length];
await readStream.ReadAsync(buffer.AsMemory(0, buffer.Length));
}
var content = Encoding.UTF8.GetString(buffer);
// 缓存数据
await _distributedCache.SetStringAsync(DISTRIBUTED_KEY, content);
// 取换行符分割字符串
var words = content.Split(new[] { "\r\n" }, StringSplitOptions.RemoveEmptyEntries);
return words;
}
/// <summary>
/// 判断脱敏词汇是否有效(支持自定义算法)
/// </summary>
/// <param name="text"></param>
/// <returns></returns>
public async Task<bool> VaildedAsync(string text)
{
// 空字符串和空白字符不验证
if (string.IsNullOrWhiteSpace(text)) return true;
// 查找脱敏词汇出现次数和位置
var foundSets = await FoundSensitiveWordsAsync(text);
return foundSets.Count == 0;
}
/// <summary>
/// 替换敏感词汇
/// </summary>
/// <param name="text"></param>
/// <param name="transfer"></param>
/// <returns></returns>
public async Task<string> ReplaceAsync(string text, char transfer = '*')
{
if (string.IsNullOrWhiteSpace(text)) return default;
// 查找脱敏词汇出现次数和位置
var foundSets = await FoundSensitiveWordsAsync(text);
// 如果没有敏感词则返回原字符串
if (foundSets.Count == 0) return text;
var stringBuilder = new StringBuilder(text);
// 循环替换
foreach (var kv in foundSets)
{
for (var i = 0; i < kv.Value.Count; i++)
{
for (var j = 0; j < kv.Key.Length; j++)
{
var tempIndex = GetSensitiveWordIndex(kv.Value, i, kv.Key.Length);
// 设置替换的字符
stringBuilder[tempIndex + j] = transfer;
}
}
}
return stringBuilder.ToString();
}
/// <summary>
/// 查找脱敏词汇
/// </summary>
/// <param name="text"></param>
private async Task<Dictionary<string, List<int>>> FoundSensitiveWordsAsync(string text)
{
// 支持读取配置渲染
var realText = text.Render();
// 获取词库
var sensitiveWords = await GetWordsAsync();
var stringBuilder = new StringBuilder(realText);
var tempStringBuilder = new StringBuilder();
// 记录脱敏词汇出现位置和次数
int findIndex;
var foundSets = new Dictionary<string, List<int>>();
// 遍历所有脱敏词汇并查找字符串是否包含
foreach (var sensitiveWord in sensitiveWords)
{
// 重新填充目标字符串
tempStringBuilder.Clear();
tempStringBuilder.Append(stringBuilder);
// 查询查找至结尾
while (tempStringBuilder.ToString().Contains(sensitiveWord))
{
if (foundSets.ContainsKey(sensitiveWord) == false)
{
foundSets.Add(sensitiveWord, new List<int>());
}
findIndex = tempStringBuilder.ToString().IndexOf(sensitiveWord);
foundSets[sensitiveWord].Add(findIndex);
// 删除从零开始,长度为 findIndex + sensitiveWord.Length 的字符串
tempStringBuilder.Remove(0, findIndex + sensitiveWord.Length);
}
}
return foundSets;
}
/// <summary>
/// 获取敏感词索引
/// </summary>
/// <param name="list"></param>
/// <param name="count"></param>
/// <param name="sensitiveWordLength"></param>
/// <returns></returns>
private static int GetSensitiveWordIndex(List<int> list, int count, int sensitiveWordLength)
{
// 用于返回当前敏感词的第 count 个的真实索引
var sum = 0;
for (var i = 0; i <= count; i++)
{
if (i == 0)
{
sum = list[i];
}
else
{
sum += list[i] + sensitiveWordLength;
}
}
return sum;
}
}
}