RegexExtractor.cs 4.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157
  1. using System;
  2. using System.Collections;
  3. using System.Collections.Generic;
  4. using System.Text;
  5. namespace FastReport.Utils
  6. {
  7. public class RegexPatternExtractor
  8. {
  9. enum State { Raw, Range, EscapeChar, RoundBracket };
  10. StringBuilder pattern = new StringBuilder();
  11. State state = State.Raw;
  12. char prevCharacter = '\0';
  13. char startRange = '\0';
  14. readonly Stack<State> stateStack = new Stack<State>();
  15. // private State prevState;
  16. public string Pattern { get { return pattern.ToString(); } }
  17. public void Clear()
  18. {
  19. pattern = new StringBuilder();
  20. stateStack.Clear();
  21. state = State.Raw;
  22. }
  23. public void AddExpression(string expression)
  24. {
  25. foreach (char ch in expression)
  26. {
  27. switch (state)
  28. {
  29. case State.Raw:
  30. RawState(ch);
  31. break;
  32. case State.RoundBracket:
  33. RoundBracketState(ch);
  34. break;
  35. case State.Range:
  36. RangeState(ch);
  37. break;
  38. case State.EscapeChar:
  39. ParseEscapeChar(ch);
  40. break;
  41. default:
  42. throw new Exception("Expession extractor unknown state");
  43. }
  44. }
  45. }
  46. private void ParseEscapeChar(char ch)
  47. {
  48. switch(ch)
  49. {
  50. case '.':
  51. case '+':
  52. case '-':
  53. case '(':
  54. case ')':
  55. case '*':
  56. case '[':
  57. case ']':
  58. case '{':
  59. case '}':
  60. case '/':
  61. case '\\':
  62. pattern.Append(ch);
  63. state = stateStack.Pop();
  64. break;
  65. default:
  66. throw new NotImplementedException("Not implemented escape sequence");
  67. }
  68. }
  69. private void RangeState(char ch)
  70. {
  71. switch (ch)
  72. {
  73. case ']':
  74. state = stateStack.Pop();
  75. break;
  76. case '-':
  77. startRange = prevCharacter;
  78. break;
  79. case '\\':
  80. stateStack.Push(state);
  81. state = State.EscapeChar;
  82. break;
  83. default:
  84. if (startRange != '\0')
  85. {
  86. if (ch > startRange)
  87. {
  88. for (char c = (char)(startRange+1); c <= ch; c++)
  89. pattern.Append(c);
  90. }
  91. #if IGNORE_ORDER
  92. else
  93. {
  94. for (char c = ch; c <= startRange; c++)
  95. pattern.Append(c);
  96. }
  97. #endif
  98. startRange = '\0';
  99. }
  100. else
  101. {
  102. pattern.Append(ch);
  103. prevCharacter = ch;
  104. }
  105. break;
  106. }
  107. }
  108. private void RoundBracketState(char ch)
  109. {
  110. switch(ch)
  111. {
  112. case ')':
  113. state = stateStack.Pop();
  114. break;
  115. case '[':
  116. stateStack.Push(state);
  117. state = State.Range;
  118. break;
  119. case '|':
  120. // Do nothing, just continue parsing
  121. break;
  122. default:
  123. pattern.Append(ch);
  124. break;
  125. }
  126. }
  127. private void RawState(char ch)
  128. {
  129. switch (ch)
  130. {
  131. case '(':
  132. stateStack.Push(state);
  133. state = State.RoundBracket;
  134. break;
  135. case '[':
  136. stateStack.Push(state);
  137. state = State.Range;
  138. break;
  139. case '*':
  140. // Ignore it in pattern extractor
  141. break;
  142. default:
  143. pattern.Append(ch);
  144. break;
  145. }
  146. }
  147. }
  148. }