using PlcSiemens.Core.Common; using System.Security.Cryptography; using System.Text; using System.Text.RegularExpressions; namespace PlcSiemens.Core.Extension { public static class StringHelper { #region 正则表达式 private static readonly Regex WebUrlExpression = new Regex( @"(http|https)://([\w-]+\.)+[\w-]+(/[\w- ./?%&=]*)?", RegexOptions.Singleline | RegexOptions.Compiled); private static readonly Regex EmailExpression = new Regex(@"^([0-9a-zA-Z]+[-._+&])*[0-9a-zA-Z]+@([-0-9a-zA-Z]+[.])+[a-zA-Z]{2,6}$", RegexOptions.Singleline | RegexOptions.Compiled); private static readonly Regex StripHtmlExpression = new Regex("<\\S[^><]*>", RegexOptions.IgnoreCase | RegexOptions.Singleline | RegexOptions.Multiline | RegexOptions.CultureInvariant | RegexOptions.Compiled); private static readonly char[] IllegalUrlCharacters = { ';', '/', '\\', '?', ':', '@', '&', '=', '+', '$', ',', '<', '>', '#', '%', '.', '!', '*', '\'', '"', '(', ')', '[', ']', '{', '}', '|', '^', '`', '~', '–', '‘', '’', '“', '”', '»', '«' }; #endregion 正则表达式 #region 字符串判断 public static bool IsNullOrEmpty(this string target) { return target == null || target.Length <= 0 || string.IsNullOrEmpty(target); } /// 是否空或者空白字符串 public static bool IsNullOrWhiteSpace(this string value) { return value == null || value.All(char.IsWhiteSpace); } /// 合法URL public static bool IsWebUrl(this string target) { return !target.IsNullOrEmpty() && WebUrlExpression.IsMatch(target); } /// 合法邮箱地址 public static bool IsEmail(this string target) { return !target.IsNullOrEmpty() && EmailExpression.IsMatch(target); } #endregion 字符串判断 #region 格式转换 /// 字符串转Hash public static string Hash(this string target) { Argument.IsNotEmpty(target, "target"); using (var md5 = MD5.Create()) { var data = Encoding.Unicode.GetBytes(target); var hash = md5.ComputeHash(data); return Convert.ToBase64String(hash); } } /// 字符串转数组 默认UTF8 public static byte[] GetBytes(this string target, Encoding encoding = null) { if (target == null) return null; if (target == string.Empty) return new byte[0]; if (encoding == null) encoding = Encoding.UTF8; return encoding.GetBytes(target); } /// 转脚本 public static string StripHtml(this string target) { return StripHtmlExpression.Replace(target, string.Empty); } /// 转GUID public static Guid ToGuid(this string target) { var result = Guid.Empty; if (!string.IsNullOrEmpty(target) && (target.Trim().Length == 22)) { var encoded = string.Concat(target.Trim().Replace("-", "+").Replace("_", "/"), "=="); try { var base64 = Convert.FromBase64String(encoded); result = new Guid(base64); } catch (FormatException) { } } return result; } /// 字符串转枚举 public static T ToEnum(this string target, T defaultValue) where T : IComparable, IFormattable { var convertedValue = defaultValue; if (!string.IsNullOrEmpty(target)) { try { convertedValue = (T)Enum.Parse(typeof(T), target.Trim(), true); } catch (ArgumentException) { } } return convertedValue; } /// 合法URL public static string ToLegalUrl(this string target) { if (string.IsNullOrEmpty(target)) { return target; } target = target.Trim(); if (target.IndexOfAny(IllegalUrlCharacters) > -1) { foreach (var character in IllegalUrlCharacters) { target = target.Replace(character.ToString(Constants.CurrentCulture), string.Empty); } } target = target.Replace(" ", "-"); while (target.Contains("--")) { target = target.Replace("--", "-"); } return target; } /// 对URL字符串进行编码 public static string UrlEncode(this string target) { return target;// HttpUtility.UrlEncode(target); } /// 对URL字符串进行解码 public static string UrlDecode(this string target) { return target;// HttpUtility.UrlDecode(target); } /// 将字符串最小限度地转换为 HTML 编码的字符串。 public static string AttributeEncode(this string target) { return target;// HttpUtility.HtmlAttributeEncode(target); } /// 将字符串转换为 HTML 编码的字符串。 public static string HtmlEncode(this string target) { return target;// HttpUtility.HtmlEncode(target); } /// 将已经为 HTTP 传输进行过 HTML 编码的字符串转换为已解码的字符串。 public static string HtmlDecode(this string target) { return target;// HttpUtility.HtmlDecode(target); } #endregion 格式转换 #region 截取扩展 /// /// 截断补. /// public static string WrapAt(this string target, int index) { const int dotCount = 3; Argument.IsNotEmpty(target, "target"); Argument.IsNotNegativeOrZero(index, "index"); return target.Length <= index ? target : string.Concat(target.Substring(0, index - dotCount), new string('.', dotCount)); } /// 确保字符串以指定的另一字符串开始,不区分大小写 public static string EnsureStart(this string target, string start) { if (start.IsNullOrEmpty()) return target; if (target.IsNullOrEmpty()) return start; if (target.StartsWith(start, StringComparison.OrdinalIgnoreCase)) return target; return start + target; } /// 确保字符串以指定的另一字符串结束,不区分大小写 public static string EnsureEnd(this string target, string end) { if (end.IsNullOrEmpty()) return target; if (target.IsNullOrEmpty()) return end; if (target.EndsWith(end, StringComparison.OrdinalIgnoreCase)) return target; return target + end; } /// 从当前字符串开头移除另一字符串,不区分大小写,循环多次匹配前缀 public static string TrimStart(this string target, params string[] starts) { if (target.IsNullOrEmpty()) return target; if (starts == null || starts.Length < 1 || string.IsNullOrEmpty(starts[0])) return target; for (var i = 0; i < starts.Length; i++) { if (target.StartsWith(starts[i], StringComparison.OrdinalIgnoreCase)) { target = target.Substring(starts[i].Length); if (string.IsNullOrEmpty(target)) break; // 从头开始 i = -1; } } return target; } /// 从当前字符串结尾移除另一字符串,不区分大小写,循环多次匹配后缀 public static string TrimEnd(this string target, params string[] ends) { if (target.IsNullOrEmpty()) return target; if (ends == null || ends.Length < 1 || string.IsNullOrEmpty(ends[0])) return target; for (var i = 0; i < ends.Length; i++) { if (target.EndsWith(ends[i], StringComparison.OrdinalIgnoreCase)) { target = target.Substring(0, target.Length - ends[i].Length); if (string.IsNullOrEmpty(target)) break; // 从头开始 i = -1; } } return target; } /// 从字符串中检索子字符串,在指定头部字符串之后,指定尾部字符串之前 public static string Substring(this string target, string after, string before = null, int startIndex = 0, int[] positions = null) { if (target.IsNullOrEmpty()) return target; if (after.IsNullOrEmpty() && before.IsNullOrEmpty()) return target; var p = -1; if (!string.IsNullOrEmpty(after)) { p = target.IndexOf(after, startIndex, StringComparison.Ordinal); if (p < 0) return null; p += after.Length; // 记录位置 if (positions != null && positions.Length > 0) positions[0] = p; } if (string.IsNullOrEmpty(before)) return target.Substring(p); var f = target.IndexOf(before, p >= 0 ? p : startIndex, StringComparison.Ordinal); if (f < 0) return null; // 记录位置 if (positions != null && positions.Length > 1) positions[1] = f; if (p >= 0) return target.Substring(p, f - p); return target.Substring(0, f); } /// 根据最大长度截取字符串,并允许以指定空白填充末尾 public static string Cut(this string str, int maxLength, string pad = null) { if (str.IsNullOrEmpty() || maxLength <= 0 || str.Length < maxLength) return str; // 计算截取长度 var len = maxLength; if (!pad.IsNullOrEmpty()) len -= pad.Length; if (len <= 0) return pad; return str.Substring(0, len) + pad; } /// 根据最大长度截取字符串(二进制计算长度),并允许以指定空白填充末尾 /// 默认采用Default编码进行处理,其它编码请参考本函数代码另外实现 /// 字符串 /// 截取后字符串的最大允许长度,包含后面填充 /// 需要填充在后面的字符串,比如几个圆点 /// 严格模式时,遇到截断位置位于一个字符中间时,忽略该字符,否则包括该字符。默认true /// public static string CutBinary(this string str, int maxLength, string pad = null, bool strict = true) { if (string.IsNullOrEmpty(str) || maxLength <= 0 || str.Length < maxLength) return str; var encoding = Encoding.Default; var buf = encoding.GetBytes(str); if (buf.Length < maxLength) return str; // 计算截取字节长度 var len = maxLength; if (!string.IsNullOrEmpty(pad)) len -= encoding.GetByteCount(pad); if (len <= 0) return pad; // 计算截取字符长度。避免把一个字符劈开 int clen; while (true) { try { clen = encoding.GetCharCount(buf, 0, len); break; } catch (DecoderFallbackException) { // 发生了回退,减少len再试 len--; } } // 可能过长,修正 if (strict) while (encoding.GetByteCount(str.ToCharArray(), 0, clen) > len) clen--; return str.Substring(0, clen) + pad; } /// 从当前字符串开头移除另一字符串以及之前的部分 /// 当前字符串 /// 另一字符串 /// public static string CutStart(this string str, params string[] starts) { if (string.IsNullOrEmpty(str)) return str; if (starts == null || starts.Length < 1 || string.IsNullOrEmpty(starts[0])) return str; for (var i = 0; i < starts.Length; i++) { var p = str.IndexOf(starts[i], StringComparison.Ordinal); if (p >= 0) { str = str.Substring(p + starts[i].Length); if (string.IsNullOrEmpty(str)) break; } } return str; } /// 从当前字符串结尾移除另一字符串以及之后的部分 /// 当前字符串 /// 另一字符串 /// public static string CutEnd(this string str, params string[] ends) { if (string.IsNullOrEmpty(str)) return str; if (ends == null || ends.Length < 1 || string.IsNullOrEmpty(ends[0])) return str; for (var i = 0; i < ends.Length; i++) { var p = str.LastIndexOf(ends[i], StringComparison.Ordinal); if (p >= 0) { str = str.Substring(0, p); if (string.IsNullOrEmpty(str)) break; } } return str; } #endregion 截取扩展 #region 匹配查找 /// 忽略大小写的字符串相等比较,判断是否以任意一个待比较字符串相等 public static bool EqualIgnoreCase(this string target, params string[] strs) { return !target.IsNullOrEmpty() && strs.Any(item => string.Equals(target, item, StringComparison.OrdinalIgnoreCase)); } /// 忽略大小写的字符串开始比较,判断是否以任意一个待比较字符串开始 public static bool StartsWithIgnoreCase(this string target, params string[] strs) { return !target.IsNullOrEmpty() && strs.Any(item => target.StartsWith(item, StringComparison.OrdinalIgnoreCase)); } /// 忽略大小写的字符串结束比较,判断是否以任意一个待比较字符串结束 public static bool EndsWithIgnoreCase(this string target, params string[] strs) { return !target.IsNullOrEmpty() && strs.Any(item => target.EndsWith(item, StringComparison.OrdinalIgnoreCase)); } public static int GetHashcode2(this string s) { if (string.IsNullOrEmpty(s)) return 0; unchecked { int hash = 23; foreach (char c in s) { hash = (hash << 5) - hash + c; } if (hash < 0) { hash = Math.Abs(hash); } return hash; } } #endregion 匹配查找 #region 分隔 /// 拆分字符串,过滤空格,无效时返回空数组 public static string[] Split(this string target, params string[] separators) { if (target.IsNullOrEmpty()) return new string[0]; if (separators == null || separators.Length < 1 || separators.Length == 1 && separators[0].IsNullOrEmpty()) separators = new[] { ",", ";" }; return target.Split(separators, StringSplitOptions.RemoveEmptyEntries); } /// 拆分字符串成为整型数组,默认逗号分号分隔,无效时返回空数组 public static int[] SplitAsInt(this string target, params string[] separators) { if (target.IsNullOrEmpty()) return new int[0]; if (separators == null || separators.Length < 1) separators = new[] { ",", ";" }; var ss = target.Split(separators, StringSplitOptions.RemoveEmptyEntries); var list = new List(); foreach (var item in ss) { int id; if (!int.TryParse(item.Trim(), out id)) continue; list.Add(id); } return list.ToArray(); } /// 拆分字符串成为名值字典。逗号分号分组,等号分隔 public static IDictionary SplitAsDictionary(this string target, string nameValueSeparator = "=", params string[] separators) { var dic = new Dictionary(); if (target.IsNullOrWhiteSpace()) return dic; if (nameValueSeparator.IsNullOrEmpty()) nameValueSeparator = "="; if (separators == null || separators.Length < 1) separators = new[] { ",", ";" }; var ss = target.Split(separators, StringSplitOptions.RemoveEmptyEntries); if (ss.Length < 1) return null; foreach (var item in ss) { var p = item.IndexOf(nameValueSeparator, StringComparison.Ordinal); // 在前后都不行 if (p <= 0 || p >= item.Length - 1) continue; var key = item.Substring(0, p).Trim(); dic[key] = item.Substring(p + nameValueSeparator.Length).Trim(); } return dic; } #endregion 分隔 #region 功能扩展 /// 安全字符串 public static string NullSafe(this string target) { return (target ?? string.Empty).Trim(); } /// 字符串格式化 public static string FormatWith(this string target, params object[] args) { Argument.IsNotEmpty(target, "target"); for (var i = 0; i < args.Length; i++) { if (args[i] is DateTime) { if (target.Contains("{" + i + "}")) args[i] = ((DateTime)args[i]).ToFullString(); } } return string.Format(target, args); } public static string Replace(this string target, ICollection oldValues, string newValue) { oldValues.ForEach(oldValue => target = target.Replace(oldValue, newValue)); return target; } #endregion 功能扩展 #region 语音播放 /// 调用语音引擎说出指定话 /// public static void Speak(this string value) { //Speecher.Speak(value); } /// 调用语音引擎说出指定话 /// public static void SpeakAsync(this string value) { //Speecher.SpeakAsync(value); } #endregion 语音播放 #region LD编辑距离算法 /// 编辑距离搜索,从词组中找到最接近关键字的若干匹配项 /// /// 算法代码由@Aimeast 独立完成。http://www.cnblogs.com/Aimeast/archive/2011/09/05/2167844.html /// /// 关键字 /// 词组 /// public static string[] LevenshteinSearch(string key, string[] words) { if (IsNullOrWhiteSpace(key)) return new string[0]; var keys = key.Split(new[] { ' ', ' ' }, StringSplitOptions.RemoveEmptyEntries); foreach (var item in keys) { var maxDist = (item.Length - 1) / 2; var q = from str in words where item.Length <= str.Length && Enumerable.Range(0, maxDist + 1) .Any(dist => { return Enumerable.Range(0, Math.Max(str.Length - item.Length - dist + 1, 0)) .Any( f => { return LevenshteinDistance(item, str.Substring(f, item.Length + dist)) <= maxDist; }); }) orderby str select str; words = q.ToArray(); } return words; } /// 编辑距离 /// /// 又称Levenshtein距离(也叫做Edit Distance),是指两个字串之间,由一个转成另一个所需的最少编辑操作次数。 /// 许可的编辑操作包括将一个字符替换成另一个字符,插入一个字符,删除一个字符。 /// 算法代码由@Aimeast 独立完成。http://www.cnblogs.com/Aimeast/archive/2011/09/05/2167844.html /// /// /// /// public static int LevenshteinDistance(string str1, string str2) { var n = str1.Length; var m = str2.Length; var C = new int[n + 1, m + 1]; int i, j, x, y, z; for (i = 0; i <= n; i++) C[i, 0] = i; for (i = 1; i <= m; i++) C[0, i] = i; for (i = 0; i < n; i++) for (j = 0; j < m; j++) { x = C[i, j + 1] + 1; y = C[i + 1, j] + 1; if (str1[i] == str2[j]) z = C[i, j]; else z = C[i, j] + 1; C[i + 1, j + 1] = Math.Min(Math.Min(x, y), z); } return C[n, m]; } #endregion LD编辑距离算法 #region LCS算法 /// 最长公共子序列搜索,从词组中找到最接近关键字的若干匹配项 /// /// 算法代码由@Aimeast 独立完成。http://www.cnblogs.com/Aimeast/archive/2011/09/05/2167844.html /// /// /// /// public static string[] LCSSearch(string key, string[] words) { if (IsNullOrWhiteSpace(key) || words == null || words.Length == 0) return new string[0]; var keys = key .Split(new[] { ' ', '\u3000' }, StringSplitOptions.RemoveEmptyEntries) .OrderBy(s => s.Length) .ToArray(); //var q = from sentence in items.AsParallel() var q = from word in words let MLL = LCSDistance(word, keys) where MLL >= 0 orderby (MLL + 0.5) / word.Length, word select word; return q.ToArray(); } /// /// 最长公共子序列问题是寻找两个或多个已知数列最长的子序列。 /// 一个数列 S,如果分别是两个或多个已知数列的子序列,且是所有符合此条件序列中最长的,则 S 称为已知序列的最长公共子序列。 /// The longest common subsequence (LCS) problem is to find the longest subsequence common to all sequences in a set of /// sequences (often just two). Note that subsequence is different from a substring, see substring vs. subsequence. It /// is a classic computer science problem, the basis of diff (a file comparison program that outputs the differences /// between two files), and has applications in bioinformatics. /// /// /// 算法代码由@Aimeast 独立完成。http://www.cnblogs.com/Aimeast/archive/2011/09/05/2167844.html /// /// /// 多个关键字。长度必须大于0,必须按照字符串长度升序排列。 /// public static int LCSDistance(string word, string[] keys) { var sLength = word.Length; var result = sLength; var flags = new bool[sLength]; var C = new int[sLength + 1, keys[keys.Length - 1].Length + 1]; //int[,] C = new int[sLength + 1, words.Select(s => s.Length).Max() + 1]; foreach (var key in keys) { var wLength = key.Length; int first = 0, last = 0; int i = 0, j = 0, LCS_L; //foreach 速度会有所提升,还可以加剪枝 for (i = 0; i < sLength; i++) for (j = 0; j < wLength; j++) if (word[i] == key[j]) { C[i + 1, j + 1] = C[i, j] + 1; if (first < C[i, j]) { last = i; first = C[i, j]; } } else C[i + 1, j + 1] = Math.Max(C[i, j + 1], C[i + 1, j]); LCS_L = C[i, j]; if (LCS_L <= wLength >> 1) return -1; while (i > 0 && j > 0) { if (C[i - 1, j - 1] + 1 == C[i, j]) { i--; j--; if (!flags[i]) { flags[i] = true; result--; } first = i; } else if (C[i - 1, j] == C[i, j]) i--; else // if (C[i, j - 1] == C[i, j]) j--; } if (LCS_L <= (last - first + 1) >> 1) return -1; } return result; } #endregion LCS算法 #region 执行命令行 ///// 以隐藏窗口执行命令行 ///// 文件名 ///// 命令参数 ///// 等待毫秒数 ///// 进程输出内容。默认为空时输出到日志 ///// 进程退出时执行 ///// 进程退出代码 //public static int Run(this string cmd, string arguments = null, int msWait = 0, Action output = null, Action onExit = null) //{ // if (XTrace.Debug) XTrace.WriteLine("Run {0} {1} {2}", cmd, arguments, msWait); // var p = new Process(); // var si = p.StartInfo; // si.FileName = cmd; // si.Arguments = arguments; // si.WindowStyle = ProcessWindowStyle.Hidden; // // 对于控制台项目,这里需要捕获输出 // if (msWait > 0) // { // si.RedirectStandardOutput = true; // si.RedirectStandardError = true; // si.UseShellExecute = false; // if (output != null) // { // p.OutputDataReceived += (s, e) => output(e.Data); // p.ErrorDataReceived += (s, e) => output(e.Data); // } // else if (HouDa.Runtime.IsConsole) // { // p.OutputDataReceived += (s, e) => XTrace.WriteLine(e.Data); // p.ErrorDataReceived += (s, e) => XTrace.Current.Error(e.Data); // } // } // if (onExit != null) p.Exited += (s, e) => onExit(s as Process); // p.Start(); // if (msWait > 0 && (output != null || HouDa.Runtime.IsConsole)) // { // p.BeginOutputReadLine(); // p.BeginErrorReadLine(); // } // if (msWait <= 0) return -1; // // 如果未退出,则不能拿到退出代码 // if (!p.WaitForExit(msWait)) return -1; // return p.ExitCode; //} #endregion 执行命令行 } }