using NCC.Dependency;
using NCC.Reflection;
using NCC.Templates.Extensions;
using Microsoft.Extensions.Caching.Distributed;
using System;
using System.Collections.Generic;
using System.Text;
using System.Threading.Tasks;
namespace NCC.SensitiveDetection
{
///
/// 脱敏词汇(脱敏)提供器(默认实现)
///
[SuppressSniffer]
public class SensitiveDetectionProvider : ISensitiveDetectionProvider
{
///
/// 分布式缓存
///
private readonly IDistributedCache _distributedCache;
///
/// 构造函数
///
///
public SensitiveDetectionProvider(IDistributedCache distributedCache)
{
_distributedCache = distributedCache;
}
///
/// 分布式缓存键
///
private const string DISTRIBUTED_KEY = "SENSITIVE:WORDS";
///
/// 返回所有脱敏词汇
///
///
public async Task> GetWordsAsync()
{
// 读取缓存数据
var wordsCached = await _distributedCache.GetStringAsync(DISTRIBUTED_KEY);
if (wordsCached != null) return wordsCached.Split(new[] { "\r\n" }, StringSplitOptions.RemoveEmptyEntries);
var entryAssembly = Reflect.GetEntryAssembly();
// 解析嵌入式文件流
byte[] buffer;
using (var readStream = entryAssembly.GetManifestResourceStream($"{Reflect.GetAssemblyName(entryAssembly)}.sensitive-words.txt"))
{
buffer = new byte[readStream.Length];
await readStream.ReadAsync(buffer.AsMemory(0, buffer.Length));
}
var content = Encoding.UTF8.GetString(buffer);
// 缓存数据
await _distributedCache.SetStringAsync(DISTRIBUTED_KEY, content);
// 取换行符分割字符串
var words = content.Split(new[] { "\r\n" }, StringSplitOptions.RemoveEmptyEntries);
return words;
}
///
/// 判断脱敏词汇是否有效(支持自定义算法)
///
///
///
public async Task VaildedAsync(string text)
{
// 空字符串和空白字符不验证
if (string.IsNullOrWhiteSpace(text)) return true;
// 查找脱敏词汇出现次数和位置
var foundSets = await FoundSensitiveWordsAsync(text);
return foundSets.Count == 0;
}
///
/// 替换敏感词汇
///
///
///
///
public async Task ReplaceAsync(string text, char transfer = '*')
{
if (string.IsNullOrWhiteSpace(text)) return default;
// 查找脱敏词汇出现次数和位置
var foundSets = await FoundSensitiveWordsAsync(text);
// 如果没有敏感词则返回原字符串
if (foundSets.Count == 0) return text;
var stringBuilder = new StringBuilder(text);
// 循环替换
foreach (var kv in foundSets)
{
for (var i = 0; i < kv.Value.Count; i++)
{
for (var j = 0; j < kv.Key.Length; j++)
{
var tempIndex = GetSensitiveWordIndex(kv.Value, i, kv.Key.Length);
// 设置替换的字符
stringBuilder[tempIndex + j] = transfer;
}
}
}
return stringBuilder.ToString();
}
///
/// 查找脱敏词汇
///
///
private async Task>> FoundSensitiveWordsAsync(string text)
{
// 支持读取配置渲染
var realText = text.Render();
// 获取词库
var sensitiveWords = await GetWordsAsync();
var stringBuilder = new StringBuilder(realText);
var tempStringBuilder = new StringBuilder();
// 记录脱敏词汇出现位置和次数
int findIndex;
var foundSets = new Dictionary>();
// 遍历所有脱敏词汇并查找字符串是否包含
foreach (var sensitiveWord in sensitiveWords)
{
// 重新填充目标字符串
tempStringBuilder.Clear();
tempStringBuilder.Append(stringBuilder);
// 查询查找至结尾
while (tempStringBuilder.ToString().Contains(sensitiveWord))
{
if (foundSets.ContainsKey(sensitiveWord) == false)
{
foundSets.Add(sensitiveWord, new List());
}
findIndex = tempStringBuilder.ToString().IndexOf(sensitiveWord);
foundSets[sensitiveWord].Add(findIndex);
// 删除从零开始,长度为 findIndex + sensitiveWord.Length 的字符串
tempStringBuilder.Remove(0, findIndex + sensitiveWord.Length);
}
}
return foundSets;
}
///
/// 获取敏感词索引
///
///
///
///
///
private static int GetSensitiveWordIndex(List list, int count, int sensitiveWordLength)
{
// 用于返回当前敏感词的第 count 个的真实索引
var sum = 0;
for (var i = 0; i <= count; i++)
{
if (i == 0)
{
sum = list[i];
}
else
{
sum += list[i] + sensitiveWordLength;
}
}
return sum;
}
}
}