C# 透過 正則表達式(正規表示式) 計算 中英數字的實際寬度(中文算2個字)進行字串切割
C# 透過 正則表達式(正規表示式) 計算 中英數字的實際寬度(中文算2個字)進行字串切割/分割/分段 (子字串)
資料來源: https://blog.typeart.cc/JavaScript%E8%A8%88%E7%AE%97%E5%90%AB%E4%B8%AD%E8%8B%B1%E6%96%87%E5%AD%97%E7%9A%84%E5%AD%97%E4%B8%B2%E9%95%B7%E5%BA%A6/
https://www.cnblogs.com/PatrickChen/archive/2008/10/17/1313250.html
Code
using System; using System.Collections.Generic; using System.Linq; using System.Text; using System.Text.RegularExpressions; using System.Threading.Tasks; namespace ConsoleApplication1 { class Program { static void pause() { Console.WriteLine("\nPress any key to terminate..."); Console.ReadKey(); } static int Wlen(String str) { string result = Regex.Replace(str, "[^\x00-\xff]", "xx"); //x00~xff: ASCII code(0~255) //^: 正則取反向 //所以只要非英數和英數標點符號 都從一個字變成兩個字 return result.Length; } static string Wsubstring(string data,int start,int len) { string strResult = data; double m = start; double n = Math.Floor((float)len / 2); //函式會回傳小於等於所給數字的最大整數。 if ((Wlen(data)-m-len)<=0) { try { strResult = data.Substring((int)m); } catch { strResult = ""; } } else { for (int i = (int)n; i < data.Length; i++) { try { if (Wlen(data.Substring((int)m, i)) >= len) { strResult = data.Substring((int)m, (int)i); break; } } catch { strResult = data.Substring((int)m); } } } return strResult; } static void Main(string[] args) { Console.WriteLine("jash中英文處理"); Console.WriteLine("jash中英文處理".Length); Console.WriteLine(Wlen("jash中英文處理")); Console.WriteLine("0,5->" + Wsubstring("jash中英文處理", 0, 5)); Console.WriteLine("0,6->" + Wsubstring("jash中英文處理", 0, 6)); Console.WriteLine("0,7->" + Wsubstring("jash中英文處理", 0, 7)); Console.WriteLine("0,8->" + Wsubstring("jash中英文處理", 0, 8)); Console.WriteLine(""); Console.WriteLine("1,5->" + Wsubstring("jash中英文處理", 1, 5)); Console.WriteLine("1,6->" + Wsubstring("jash中英文處理", 1, 6)); Console.WriteLine("1,7->" + Wsubstring("jash中英文處理", 1, 7)); Console.WriteLine("1,8->" + Wsubstring("jash中英文處理", 1, 8)); Console.WriteLine(""); Console.WriteLine("2,5->" + Wsubstring("jash中英文處理", 2, 5)); Console.WriteLine("2,6->" + Wsubstring("jash中英文處理", 2, 6)); Console.WriteLine("2,7->" + Wsubstring("jash中英文處理", 2, 7)); Console.WriteLine("2,8->" + Wsubstring("jash中英文處理", 2, 8)); Console.WriteLine(""); Console.WriteLine("3,5->" + Wsubstring("jash中英文處理", 3, 5)); Console.WriteLine("3,6->" + Wsubstring("jash中英文處理", 3, 6)); Console.WriteLine("3,7->" + Wsubstring("jash中英文處理", 3, 7)); Console.WriteLine("3,8->" + Wsubstring("jash中英文處理", 3, 8)); Console.WriteLine(""); Console.WriteLine("4,5->" + Wsubstring("jash中英文處理", 4, 5)); Console.WriteLine("4,6->" + Wsubstring("jash中英文處理", 4, 6)); Console.WriteLine("4,7->" + Wsubstring("jash中英文處理", 4, 7)); Console.WriteLine("4,8->" + Wsubstring("jash中英文處理", 4, 8)); Console.WriteLine(""); Console.WriteLine("5,5->" + Wsubstring("jash中英文處理", 5, 5)); Console.WriteLine("5,6->" + Wsubstring("jash中英文處理", 5, 6)); Console.WriteLine("5,7->" + Wsubstring("jash中英文處理", 5, 7)); Console.WriteLine("5,8->" + Wsubstring("jash中英文處理", 5, 8)); pause(); /*Ans: jash中英文處理 9 14 0,5->jash中 0,6->jash中 0,7->jash中英 0,8->jash中英 1,5->ash中 1,6->ash中英 1,7->ash中英 1,8->ash中英文 2,5->sh中英 2,6->sh中英 2,7->sh中英文 2,8->sh中英文 3,5->h中英 3,6->h中英文 3,7->h中英文 3,8->h中英文處 4,5->中英文 4,6->中英文 4,7->中英文處 4,8->中英文處 5,5->英文處 5,6->英文處 5,7->英文處理 5,8->英文處理 Press any key to terminate... */ } } }
PS.正規表示式/ 正規表達式教學
. 點,匹配任何字符
^ 開始錨,匹配字符串的開頭
$ 結束錨,匹配字符串的結尾
* 星號,匹配零個或多個(貪婪)
+ 加號,匹配一個或多個(貪婪)
? 問題,匹配零或一(非貪婪)
[abc] 字符類,如果{‘a’,’b’,’c’}中的一個匹配則匹配
[^abc] 反相的類,如果不是{‘a’,’b’,’c’}中的一個,則匹配。注意:此功能當前在某些字符範圍內無效!
[a-zA-Z] 字符範圍,範圍的字符集{a-z | A-Z}
\s 空格,\t \f \r \n \v和空格
\S 非空白
\w 字母數字,[a-zA-Z0-9_]
\W 非字母數字
\d 位數字,[0-9]
\D 非數字
. Dot, matches any character
^ Start anchor, matches beginning of string
$ End anchor, matches end of string
* Asterisk, match zero or more (greedy)
+ Plus, match one or more (greedy)
? Question, match zero or one (non-greedy)
[abc] Character class, match if one of {‘a’, ‘b’, ‘c’}
[^abc] Inverted class, match if NOT one of {‘a’, ‘b’, ‘c’} NOTE: This feature is currently broken for some usage of character ranges!
[a-zA-Z] Character ranges, the character set of the ranges { a-z | A-Z }
\s Whitespace, \t \f \r \n \v and spaces
\S Non-whitespace
\w Alphanumeric, [a-zA-Z0-9_]
\W Non-alphanumeric
\d Digits, [0-9]
\D Non-digits