首先祝贺北京奥运会精彩开幕!!我亲眼见到了鸟巢的壮观景象,跟一堆外国人在一起看开幕式时我第一次感觉这么自豪!加油奥运,加油中国!
今天把词法分析器写好了,一个巨大的switch。代码如下:
//CNScriptLex.h
1 #ifndef CNSCRIPTLEX_H
2 #define CNSCRIPTLEX_H
3
4 #include "..\..\..\System\System.h"
5
6
7 namespace CNScript_1
8 {
9
10 namespace CNScriptLexTokenType
11 {
12 enum ECNScriptLexTokenType
13 {
14 None,
15 Keyword_if,
16 Keyword_else,
17 Keyword_for,
18 Keyword_do,
19 Keyword_while,
20 Keyword_return,
21 Identifier,
22 TypeName,
23 IntegerValue,
24 DoubleValue,
25 Operator,
26 Separator,
27 Space,
28 Comment,
29 BoolValue,
30 CharValue,
31 StringValue,
32 BigLeftBracket,
33 BigRightBracket,
34 MidLeftBracket,
35 MidRightBracket,
36 SmallLeftBracket,
37 SmallRightBracket,
38 Error
39 };
40 }
41
42
43 class CNScriptLexToken
44 {
45 public:
46 CNScriptLexToken();
47
48 System::String ToString();
49
50 CNScriptLexTokenType::ECNScriptLexTokenType Type;
51 System::String Value;
52 int LineNumber;
53 int ColumnNumber;
54
55 private:
56 System::String GetTypeString();
57 };
58
59
60 class CNScriptLexParser
61 {
62 public:
63 CNScriptLexParser();
64
65 System::Collections::Generic::List<CNScriptLexToken> Parse(const System::String& scriptCode);
66
67 private:
68 int currInput;
69 int currLine;
70 int currCol;
71 int currState;
72 int eaten;
73 int currTokenStartLine;
74 int currTokenStartCol;
75 int codeLength;
76 System::String code;
77 const wchar_t* pcode;
78 System::Collections::Generic::List<CNScriptLexToken> tokens;
79
80 void advance();
81 void accept(CNScriptLexTokenType::ECNScriptLexTokenType tokenType);
82 void error();
83 System::String tryGetValue();
84 };
85
86 }
87
88 #endif
//CNScriptLex.cpp
1 #include "CNScriptLex.h"
2
3 using namespace System;
4 using namespace System::Collections::Generic;
5
6 namespace CNScript_1
7 {
8 CNScriptLexToken::CNScriptLexToken()
9 {
10 Type = CNScriptLexTokenType::None;
11 LineNumber = 0;
12 ColumnNumber = -1;
13 }
14
15 String CNScriptLexToken::GetTypeString()
16 {
17 switch(Type)
18 {
19 case CNScriptLexTokenType::BigLeftBracket:
20 return L"BLBracket";
21 case CNScriptLexTokenType::BigRightBracket:
22 return L"BRBracket";
23 case CNScriptLexTokenType::BoolValue:
24 return L"BoolValue";
25 case CNScriptLexTokenType::CharValue:
26 return L"CharValue";
27 case CNScriptLexTokenType::Comment:
28 return L"Comment";
29 case CNScriptLexTokenType::Identifier:
30 return L"Identifier";
31 case CNScriptLexTokenType::Keyword_do:
32 return L"Keyword_do";
33 case CNScriptLexTokenType::Keyword_else:
34 return L"Keyword_else";
35 case CNScriptLexTokenType::Keyword_for:
36 return L"Keyword_for";
37 case CNScriptLexTokenType::Keyword_if:
38 return L"Keyword_if";
39 case CNScriptLexTokenType::Keyword_return:
40 return L"Keyword_return";
41 case CNScriptLexTokenType::Keyword_while:
42 return L"Keyword_while";
43 case CNScriptLexTokenType::MidLeftBracket:
44 return L"MLBracket";
45 case CNScriptLexTokenType::MidRightBracket:
46 return L"MRBracket";
47 case CNScriptLexTokenType::None:
48 return L"None";
49 case CNScriptLexTokenType::IntegerValue:
50 return L"IntegerValue";
51 case CNScriptLexTokenType::DoubleValue:
52 return L"DoubleValue";
53 case CNScriptLexTokenType::Operator:
54 return L"Operator";
55 case CNScriptLexTokenType::Separator:
56 return L"Separator";
57 case CNScriptLexTokenType::SmallLeftBracket:
58 return L"SLBracket";
59 case CNScriptLexTokenType::SmallRightBracket:
60 return L"SRBracket";
61 case CNScriptLexTokenType::Space:
62 return L"Space";
63 case CNScriptLexTokenType::StringValue:
64 return L"StringValue";
65 case CNScriptLexTokenType::TypeName:
66 return L"TypeName";
67 case CNScriptLexTokenType::Error:
68 return L"Error";
69 default:
70 return L"None";
71 }
72 }
73
74 String CNScriptLexToken::ToString()
75 {
76 String result = L"[TokenType:" + GetTypeString() + L"\tLine:" + String::ToString(LineNumber) + L"\tColumn:" + String::ToString(ColumnNumber) + L"\tValue:" + Value + L"]";
77 result = result.Replace(L"\r\n", L"\\r\\n");
78
79 return result;
80 }
81
82 CNScriptLexParser::CNScriptLexParser()
83 {
84 }
85
86 void CNScriptLexParser::advance()
87 {
88 wchar_t prevChar = pcode[currInput];
89
90 if(prevChar == L'\n')
91 {
92 currLine ++;
93 currCol = 1;
94 currInput ++;
95 }
96 else
97 {
98 currCol ++;
99 currInput ++;
100 }
101 }
102
103 String CNScriptLexParser::tryGetValue()
104 {
105 int newTokenLength = currInput - eaten;
106
107 return code.SubString(eaten, newTokenLength);
108 }
109
110 void CNScriptLexParser::accept(CNScriptLexTokenType::ECNScriptLexTokenType tokenType)
111 {
112 int newTokenLength = currInput - eaten;
113
114 CNScriptLexToken newToken;
115 newToken.LineNumber = currTokenStartLine;
116 newToken.ColumnNumber = currTokenStartCol;
117 newToken.Type = tokenType;
118 newToken.Value = code.SubString(eaten, newTokenLength);
119
120 tokens.Add(newToken);
121
122 eaten += newTokenLength;
123 currTokenStartLine = currLine;
124 currTokenStartCol = currCol;
125 currState = 0;
126 }
127
128 void CNScriptLexParser::error()
129 {
130 accept(CNScriptLexTokenType::Error);
131 currInput = codeLength + 1;
132 }
133
134 List<CNScriptLexToken> CNScriptLexParser::Parse(const System::String &scriptCode)
135 {
136 currInput = 0;
137 currLine = 1;
138 currCol = 1;
139 currState = 0;
140 eaten = 0;
141 code = scriptCode;
142 pcode = scriptCode.Buffer();
143 codeLength = scriptCode.Length();
144
145 currTokenStartLine = 1;
146 currTokenStartCol = 1;
147
148 tokens.Clear();
149
150 while(currInput <= codeLength)
151 {
152 wchar_t ch = pcode[currInput];
153
154 switch(currState)
155 {
156 case 0:
157 {
158 if(ch == 0)
159 {
160 advance();
161 }
162 else if(ch == L';')
163 {
164 currState = 1;
165 advance();
166 }
167 else if(ch == L'/')
168 {
169 currState = 2;
170 advance();
171 }
172 else if(ch == L' ' || ch == L'\r' || ch == L'\n' || ch == L'\t')
173 {
174 currState = 4;
175 advance();
176 }
177 else if(ch == L'{')
178 {
179 currState = 5;
180 advance();
181 }
182 else if(ch == L'}')
183 {
184 currState = 6;
185 advance();
186 }
187 else if(ch == L'[')
188 {
189 currState = 7;
190 advance();
191 }
192 else if(ch == L']')
193 {
194 currState = 8;
195 advance();
196 }
197 else if(ch == L'(')
198 {
199 currState = 9;
200 advance();
201 }
202 else if(ch == L')')
203 {
204 currState = 10;
205 advance();
206 }
207 else if(ch == L'0')
208 {
209 currState = 11;
210 advance();
211 }
212 else if(ch >= L'1' && ch <= L'9')
213 {
214 currState = 14;
215 advance();
216 }
217 else if(ch == L'\'')
218 {
219 currState = 17;
220 advance();
221 }
222 else if(ch == L'"')
223 {
224 currState = 23;
225 advance();
226 }
227 else if(ch == L'=')
228 {
229 currState = 26;
230 advance();
231 }
232 else if(ch == L'+')
233 {
234 currState = 28;
235 advance();
236 }
237 else if(ch == L'-')
238 {
239 currState = 30;
240 advance();
241 }
242 else if(ch == L'!')
243 {
244 currState = 32;
245 advance();
246 }
247 else if(ch == L'|')
248 {
249 currState = 34;
250 advance();
251 }
252 else if(ch == L'&')
253 {
254 currState = 36;
255 advance();
256 }
257 else if(ch == L'*')
258 {
259 currState = 38;
260 advance();
261 }
262 else if(ch == L'<')
263 {
264 currState = 39;
265 advance();
266 }
267 else if(ch == L'>')
268 {
269 currState = 41;
270 advance();
271 }
272 else if(ch == L'_')
273 {
274 currState = 43;
275 advance();
276 }
277 else if((ch >= L'a' && ch <= L'z') || (ch >= L'A' && ch <= L'Z'))
278 {
279 currState = 44;
280 advance();
281 }
282 else
283 {
284 error();
285 }
286 }
287 break;
288 case 1:
289 {
290 accept(CNScriptLexTokenType::Separator);
291 }
292 break;
293 case 2:
294 {
295 if(ch == L'/')
296 {
297 currState = 3;
298 advance();
299 }
300 else
301 {
302 accept(CNScriptLexTokenType::Operator);
303 }
304 }
305 break;
306 case 3:
307 {
308 if(ch != L'\r' && ch != L'\n' && ch != 0)
309 {
310 advance();
311 }
312 else
313 {
314 accept(CNScriptLexTokenType::Comment);
315 }
316 }
317 break;
318 case 4:
319 {
320 if(ch == L' ' || ch == L'\r' || ch == L'\n' || ch == L'\t')
321 {
322 advance();
323 }
324 else
325 {
326 accept(CNScriptLexTokenType::Space);
327 }
328 }
329 break;
330 case 5:
331 {
332 accept(CNScriptLexTokenType::BigLeftBracket);
333 }
334 break;
335 case 6:
336 {
337 accept(CNScriptLexTokenType::BigRightBracket);
338 }
339 break;
340 case 7:
341 {
342 accept(CNScriptLexTokenType::MidLeftBracket);
343 }
344 break;
345 case 8:
346 {
347 accept(CNScriptLexTokenType::MidRightBracket);
348 }
349 break;
350 case 9:
351 {
352 accept(CNScriptLexTokenType::SmallLeftBracket);
353 }
354 break;
355 case 10:
356 {
357 accept(CNScriptLexTokenType::SmallRightBracket);
358 }
359 break;
360 case 11:
361 {
362 if(ch == L'.')
363 {
364 currState = 12;
365 advance();
366 }
367 else
368 {
369 accept(CNScriptLexTokenType::IntegerValue);
370 }
371 }
372 break;
373 case 12:
374 {
375 if(ch >= L'0' && ch <= L'9')
376 {
377 currState = 13;
378 advance();
379 }
380 else
381 {
382 error();
383 }
384 }
385 break;
386 case 13:
387 {
388 if(ch >= L'0' && ch <= L'9')
389 {
390 advance();
391 }
392 else
393 {
394 accept(CNScriptLexTokenType::DoubleValue);
395 }
396 }
397 break;
398 case 14:
399 {
400 if(ch >= L'0' && ch <= L'9')
401 {
402 advance();
403 }
404 else if(ch == L'.')
405 {
406 currState = 15;
407 advance();
408 }
409 else
410 {
411 accept(CNScriptLexTokenType::IntegerValue);
412 }
413 }
414 break;
415 case 15:
416 {
417 if(ch >= L'0' && ch <= L'9')
418 {
419 currState = 16;
420 advance();
421 }
422 else
423 {
424 error();
425 }
426 }
427 break;
428 case 16:
429 {
430 if(ch >= L'0' && ch <= L'9')
431 {
432 advance();
433 }
434 else
435 {
436 accept(CNScriptLexTokenType::DoubleValue);
437 }
438 }
439 break;
440 case 17:
441 {
442 if(ch == L'\\')
443 {
444 currState = 18;
445 advance();
446 }
447 else if(ch != L'\'' && ch != L'\r' && ch != L'\n' && ch != L'\t' && ch != 0)
448 {
449 currState = 21;
450 advance();
451 }
452 else
453 {
454 error();
455 }
456 }
457 break;
458 case 18:
459 {
460 if(ch != L' ' && ch != L'\r' && ch != L'\n' && ch != L'\t' && ch != 0)
461 {
462 currState = 19;
463 advance();
464 }
465 else
466 {
467 error();
468 }
469 }
470 break;
471 case 19:
472 {
473 if(ch == L'\'')
474 {
475 currState = 20;
476 advance();
477 }
478 else
479 {
480 error();
481 }
482 }
483 break;
484 case 20:
485 {
486 accept(CNScriptLexTokenType::CharValue);
487 }
488 break;
489 case 21:
490 {
491 if(ch == L'\'')
492 {
493 currState = 22;
494 advance();
495 }
496 else
497 {
498 error();
499 }
500 }
501 break;
502 case 22:
503 {
504 accept(CNScriptLexTokenType::CharValue);
505 }
506 break;
507 case 23:
508 {
509 if(ch != L'"' && ch != L'\\' && ch != L'\r' && ch != L'\n' && ch != 0)
510 {
511 advance();
512 }
513 else if(ch == L'\\')
514 {
515 currState = 24;
516 advance();
517 }
518 else if(ch == L'"')
519 {
520 currState = 25;
521 advance();
522 }
523 else
524 {
525 error();
526 }
527 }
528 break;
529 case 24:
530 {
531 if(ch != L' ' && ch != L'\r' && ch != L'\n' && ch != L'\t' && ch != 0)
532 {
533 currState = 23;
534 advance();
535 }
536 else
537 {
538 error();
539 }
540 }
541 break;
542 case 25:
543 {
544 accept(CNScriptLexTokenType::StringValue);
545 }
546 break;
547 case 26:
548 {
549 if(ch == L'=')
550 {
551 currState = 27;
552 advance();
553 }
554 else
555 {
556 accept(CNScriptLexTokenType::Operator);
557 }
558 }
559 break;
560 case 27:
561 {
562 accept(CNScriptLexTokenType::Operator);
563 }
564 break;
565 case 28:
566 {
567 if(ch == L'+')
568 {
569 currState = 29;
570 advance();
571 }
572 else
573 {
574 accept(CNScriptLexTokenType::Operator);
575 }
576 }
577 break;
578 case 29:
579 {
580 accept(CNScriptLexTokenType::Operator);
581 }
582 break;
583 case 30:
584 {
585 if(ch == L'-')
586 {
587 currState = 31;
588 advance();
589 }
590 else
591 {
592 accept(CNScriptLexTokenType::Operator);
593 }
594 }
595 break;
596 case 31:
597 {
598 accept(CNScriptLexTokenType::Operator);
599 }
600 break;
601 case 32:
602 {
603 if(ch == L'=')
604 {
605 currState = 33;
606 advance();
607 }
608 else
609 {
610 accept(CNScriptLexTokenType::Operator);
611 }
612 }
613 break;
614 case 33:
615 {
616 accept(CNScriptLexTokenType::Operator);
617 }
618 break;
619 case 34:
620 {
621 if(ch == L'|')
622 {
623 currState = 35;
624 advance();
625 }
626 else
627 {
628 error();
629 }
630 }
631 break;
632 case 35:
633 {
634 accept(CNScriptLexTokenType::Operator);
635 }
636 break;
637 case 36:
638 {
639 if(ch == L'&')
640 {
641 currState = 37;
642 advance();
643 }
644 else
645 {
646 error();
647 }
648 }
649 break;
650 case 37:
651 {
652 accept(CNScriptLexTokenType::Operator);
653 }
654 break;
655 case 38:
656 {
657 accept(CNScriptLexTokenType::Operator);
658 }
659 break;
660 case 39:
661 {
662 if(ch == L'=')
663 {
664 currState = 40;
665 advance();
666 }
667 else
668 {
669 accept(CNScriptLexTokenType::Operator);
670 }
671 }
672 break;
673 case 40:
674 {
675 accept(CNScriptLexTokenType::Operator);
676 }
677 break;
678 case 41:
679 {
680 if(ch == L'=')
681 {
682 currState = 42;
683 advance();
684 }
685 else
686 {
687 accept(CNScriptLexTokenType::Operator);
688 }
689 }
690 break;
691 case 42:
692 {
693 accept(CNScriptLexTokenType::Operator);
694 }
695 break;
696 case 43:
697 {
698 if(ch == L'_')
699 {
700 advance();
701 }
702 else if((ch >= L'a' && ch <= 'z') || (ch >= L'A' && ch <= L'Z') || (ch >= L'0' && ch <= L'9'))
703 {
704 currState = 44;
705 advance();
706 }
707 else
708 {
709 error();
710 }
711 }
712 break;
713 case 44:
714 {
715 if((ch >= L'a' && ch <= 'z') || (ch >= L'A' && ch <= L'Z') || (ch >= L'0' && ch <= L'9') || ch == L'_')
716 {
717 advance();
718 }
719 else
720 {
721 String tokenValue = tryGetValue();
722 CNScriptLexTokenType::ECNScriptLexTokenType thisTokenType;
723
724 if(tokenValue == L"if")
725 {
726 thisTokenType = CNScriptLexTokenType::Keyword_if;
727 }
728 else if(tokenValue == L"else")
729 {
730 thisTokenType = CNScriptLexTokenType::Keyword_else;
731 }
732 else if(tokenValue == L"for")
733 {
734 thisTokenType = CNScriptLexTokenType::Keyword_for;
735 }
736 else if(tokenValue == L"do")
737 {
738 thisTokenType = CNScriptLexTokenType::Keyword_do;
739 }
740 else if(tokenValue == L"while")
741 {
742 thisTokenType = CNScriptLexTokenType::Keyword_while;
743 }
744 else if(tokenValue == L"return")
745 {
746 thisTokenType = CNScriptLexTokenType::Keyword_return;
747 }
748 else if( tokenValue == L"int" ||
749 tokenValue == L"double" ||
750 tokenValue == L"char" ||
751 tokenValue == L"string" ||
752 tokenValue == L"bool" ||
753 tokenValue == L"void")
754 {
755 thisTokenType = CNScriptLexTokenType::TypeName;
756 }
757 else
758 {
759 thisTokenType = CNScriptLexTokenType::Identifier;
760 }
761
762 accept(thisTokenType);
763 }
764 }
765 break;
766 }
767 }
768
769 return tokens;
770 }
771 }
//Program.cpp
1 #include "..\..\System\System.h"
2 #include "CNScript\CNScript.h"
3
4 using namespace System;
5 using namespace System::IO;
6 using namespace System::Collections::Generic;
7 using namespace CNScript_1;
8
9 int Program(const String& arg)
10 {
11 System::Windows::Forms::Application::RunConsoleApplication();
12
13 CNScriptLexParser parser;
14
15 Console::WriteLine(L"---------CNScript Lexical Parser---------");
16
17 Console::Write(L"\r\nInput the code file name:");
18 String codeFileName = Console::Read();
19
20 StreamReader reader(codeFileName);
21 String code = reader.ReadToEnd();
22 reader.Close();
23
24 Console::WriteLine(L"\r\nTesting code:");
25 Console::WriteLine(code);
26
27 Console::Write(L"\r\nPress [Enter] to start testing\r\n");
28 Console::Read();
29
30 Int64 startTime = GetTickCount();
31 List<CNScriptLexToken> tokens = parser.Parse(code);
32 Int64 endTime = GetTickCount();
33
34 ListIterator<CNScriptLexToken> tokenIter(tokens);
35
36 while(tokenIter.Foreach())
37 {
38 Console::WriteLine(tokenIter.Item.ToString());
39 }
40
41 Console::WriteLine(L"\r\nTime Cost:" + String::ToString(endTime - startTime) + L" ms");
42
43 return 0;
44 }
运行结果: