RegexExtractor.cs 4.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155
  1. using System;
  2. using System.Collections;
  3. using System.Collections.Generic;
  4. using System.Text;
  5. namespace FastReport.Utils
  6. {
  7. public class RegexPatternExtractor
  8. {
  9. enum State { Raw, Range, EscapeChar, RoundBracket };
  10. StringBuilder pattern = new StringBuilder();
  11. State state = State.Raw;
  12. char prevCharacter = '\0';
  13. char startRange = '\0';
  14. Stack<State> stateStack = new Stack<State>();
  15. public string Pattern { get { return pattern.ToString(); } }
  16. public void Clear()
  17. {
  18. pattern = new StringBuilder();
  19. stateStack.Clear();
  20. state = State.Raw;
  21. }
  22. public void AddExpression(string expression)
  23. {
  24. foreach (char ch in expression)
  25. {
  26. switch (state)
  27. {
  28. case State.Raw:
  29. RawState(ch);
  30. break;
  31. case State.RoundBracket:
  32. RoundBracketState(ch);
  33. break;
  34. case State.Range:
  35. RangeState(ch);
  36. break;
  37. case State.EscapeChar:
  38. ParseEscapeChar(ch);
  39. break;
  40. default:
  41. throw new Exception("Expession extractor unknown state");
  42. }
  43. }
  44. }
  45. private void ParseEscapeChar(char ch)
  46. {
  47. switch(ch)
  48. {
  49. case '.':
  50. case '+':
  51. case '-':
  52. case '(':
  53. case ')':
  54. case '*':
  55. case '[':
  56. case ']':
  57. case '{':
  58. case '}':
  59. case '/':
  60. case '\\':
  61. pattern.Append(ch);
  62. state = stateStack.Pop();
  63. break;
  64. default:
  65. throw new NotImplementedException("Not implemented escape sequence");
  66. }
  67. }
  68. private void RangeState(char ch)
  69. {
  70. switch (ch)
  71. {
  72. case ']':
  73. state = stateStack.Pop();
  74. break;
  75. case '-':
  76. startRange = prevCharacter;
  77. break;
  78. case '\\':
  79. stateStack.Push(state);
  80. state = State.EscapeChar;
  81. break;
  82. default:
  83. if (startRange != '\0')
  84. {
  85. if (ch > startRange)
  86. {
  87. for (char c = (char)(startRange+1); c <= ch; c++)
  88. pattern.Append(c);
  89. }
  90. #if IGNORE_ORDER
  91. else
  92. {
  93. for (char c = ch; c <= startRange; c++)
  94. pattern.Append(c);
  95. }
  96. #endif
  97. startRange = '\0';
  98. }
  99. else
  100. {
  101. pattern.Append(ch);
  102. prevCharacter = ch;
  103. }
  104. break;
  105. }
  106. }
  107. private void RoundBracketState(char ch)
  108. {
  109. switch(ch)
  110. {
  111. case ')':
  112. state = stateStack.Pop();
  113. break;
  114. case '[':
  115. stateStack.Push(state);
  116. state = State.Range;
  117. break;
  118. case '|':
  119. // Do nothing, just continue parsing
  120. break;
  121. default:
  122. pattern.Append(ch);
  123. break;
  124. }
  125. }
  126. private void RawState(char ch)
  127. {
  128. switch (ch)
  129. {
  130. case '(':
  131. stateStack.Push(state);
  132. state = State.RoundBracket;
  133. break;
  134. case '[':
  135. stateStack.Push(state);
  136. state = State.Range;
  137. break;
  138. case '*':
  139. // Ignore it in pattern extractor
  140. break;
  141. default:
  142. pattern.Append(ch);
  143. break;
  144. }
  145. }
  146. }
  147. }