带权重的词典设计
一、词典类
public class WeightedDictionary
{
public List<WeightedWord> Words { get; set; } = new List<WeightedWord>();
private const int MAX_WORDS = 5000;
public void AddWord(WeightedWord word)
{
var existing = Words.FirstOrDefault(w => w.Text == word.Text);
if (existing != null)
{
existing.Weight += word.Weight;
}
else
{
Words.Add(word);
}
// 维护最大数量限制
if (Words.Count > MAX_WORDS)
{
Words = Words
.OrderByDescending(w => w.Weight)
.Take(MAX_WORDS)
.ToList();
}
}
}
public class WeightedWord
{
[JsonProperty("text")]
public string Text { get; set; }
[JsonProperty("pinyin")]
public string Pinyin { get; set; }
[JsonProperty("weight")]
public float Weight { get; set; } = 1.0f;
[JsonProperty("replacements")]
public List<string> CommonErrors { get; set; } = new List<string>();
}
二、智能权重管理
每用到一个词,*1.2来增加权重,其他词*0.98来衰减,新增的词自带1权重。
public class WeightManager
{
// 动态权重调整
public void UpdateWeights(string recognizedText)
{
foreach (var word in _dictionary.Words)
{
if (recognizedText.Contains(word.Text))
{
word.Weight *= 1.2f; // 增加当前词权重
}
else
{
word.Weight *= 0.98f; // 衰减未使用词的权重
}
}
}
// 自动清理低权重词
public void PruneDictionary(float threshold = 0.1f)
{
_dictionary.Words.RemoveAll(w => w.Weight < threshold);
}
}
三、将词典保存到本地
private void SaveDictionary()
{
var timestamp = DateTime.Now.ToString("yyyyMMddHHmmss");
File.Copy("active_dict.json", $"archive/dict_{timestamp}.json");
var json = JsonConvert.SerializeObject(_dictionary);
File.WriteAllText("active_dict.json", json);
}