2007年02月15日 星期四 13:34
这是一个小小的正则表达式解析程序,(虽然是自己的格式,并且效率不高,没有记忆化,且写于C#),请指教: public class MatchHelper { struct LangAtom { string LangConfs; bool positive; bool greed; int least; int most; const int MAXMATCH = 1073741823; public static int construct(string inp, int stp, out LangAtom lang) { lang = new LangAtom(); lang.greed = true; lang.least = lang.most = 1; //Test LangConf should be int ofs = 0; string tmp; switch (inp.Substring(stp + ofs, 1)) { case "[": ofs++; if (inp.Substring(stp + ofs, 1) == "^") lang.positive = false; else { lang.positive = true; ofs--; } while ((tmp = inp.Substring(stp + (++ofs), 1)) != "]") if (tmp == "%") lang.LangConfs += "%" + inp.Substring(stp + (++ofs)); else lang.LangConfs += tmp; break; case "%": lang.LangConfs = "%" + inp.Substring(stp + (++ofs), 1); lang.positive = true; break; case ".": lang.LangConfs = ""; lang.positive = false; break; default: lang.LangConfs = inp.Substring(stp + ofs, 1); lang.positive = true; break; } ofs++; if (inp.Length>stp+ofs) switch (inp.Substring(stp + ofs, 1)) { case "*": lang.most = MAXMATCH; lang.least = 0; ofs++; break; case "+": lang.most = MAXMATCH; ofs++; break; case "-": lang.most = MAXMATCH; lang.least = 0; lang.greed = false; ofs++; break; case "?": lang.least = 0; lang.most = 1; lang.greed = false; ofs++; break; } lang.least--; return ofs; } private bool _MatchAtom(string t) { char tchar=t.ToCharArray()[0]; //Try to match single atom int posi; for (posi = 0; posi < LangConfs.Length; posi++) { string conf = LangConfs.Substring(posi, 1); if (conf == "%") { //Try to parse special config posi++; conf = LangConfs.Substring(posi, 1); switch (conf) { case "a": if (char.IsLetter(tchar)) return true; break; case "c": if (char.IsControl(tchar)) return true; break; case "d": if (char.IsDigit(tchar)) return true; break; case "l": if (char.IsLower(tchar)) return true; break; case "p": if (char.IsPunctuation(tchar)) return true; break; case "s": if (char.IsWhiteSpace(tchar)) return true; break; case "u": if (char.IsUpper(tchar)) return true; break; case "w": if (char.IsLetterOrDigit(tchar)) return true; break; case "x": if (char.IsNumber(tchar) || "abcdefABCDEF".Contains(t)) return true; break; case "z": if (t == "\0") return true; break; default: if (t == conf) return true; break; } } else { if (t == conf) return true; if (LangConfs.Length > posi + 2 && LangConfs.Substring(posi + 1, 1) == "_") { posi += 2; if (tchar > conf.ToCharArray()[0] && tchar <= LangConfs.Substring(posi, 1).ToCharArray()[0]) return true; } } } return false; } public bool MatchAtom(string t) { return _MatchAtom(t) ^ !positive; } /* * If first called, lastlen=-1 * If called again, lastlen=last call returns (length) * If no available match, return -1 */ public int MatchLen(string s, int stp, int lastlen) { int _last = lastlen; if (_last == -2) if (greed) _last = most + 1; else _last = least - 1; //Trust lastlen as Okay if (greed) { if (lastlen != -2) if (lastlen > least) return lastlen - 1; else return -2; _last--; //Try to find a solution from _last to least int i; for (i = 0; i < _last && stp + i < s.Length; i++) if (!MatchAtom(s.Substring(stp + i, 1))) break; i--; if (i < least) return -2; else return i; } else { _last++; //Try to find a solution from _last to most int i; if (lastlen == -2) if (least == -1) return -1; else lastlen = least; if (lastlen < 0) lastlen = 0; for (i = lastlen; i <= most && stp + i < s.Length; i++) if (MatchAtom(s.Substring(stp + i, 1))) { if (i >= _last) return i; } else return -2; return -2; } } } Listatoms; bool TouchHead; bool TouchTail; List > groups; public MatchHelper(string pattern) { atoms = new List (); groups = new List >(); //Parse LangAtoms Pair tmp = new Pair (); int posi = 0; LangAtom tatom; if (pattern.StartsWith("^")) { TouchHead = true; posi = 1; } else TouchHead = false; TouchTail = false; while (posi < pattern.Length) { if (pattern.Substring(posi, 1) == "(") { tmp = new Pair (); tmp.first = atoms.Count; posi++; continue; } if (pattern.Substring(posi, 1) == ")") { tmp.second = atoms.Count; groups.Add(tmp); posi++; continue; } if (pattern.Length == posi + 1 && pattern.Substring(posi, 1) == "$") { TouchTail = true; posi++; continue; } posi += LangAtom.construct(pattern, posi, out tatom); atoms.Add(tatom); } } System.Collections.Generic.Dictionary , bool> tmpMatchable; List posikey; private bool DoSearch(string matchstr, int level) { if (atoms.Count == level) if (!TouchTail || posikey[posikey.Count - 1] == matchstr.Length) return true; else return false; int _prev = -2; int last = posikey[posikey.Count - 1]; if (tmpMatchable.ContainsKey(new Pair (last, level))) return false; for (; ; ) { _prev = atoms[level].MatchLen(matchstr, last, _prev); if (_prev == -2) break; posikey.Add(_prev + last + 1); if (DoSearch(matchstr, level + 1)) return true; posikey.RemoveAt(posikey.Count - 1); } tmpMatchable.Add(new Pair (last, level), false); return false; } private List _DoMatch(string matchstr, int startp) { tmpMatchable = new Dictionary , bool>(); posikey = new List (); if (TouchHead && startp != 0) return null; int i; bool succ = false; if (TouchHead) { posikey.Add(startp); if (!DoSearch(matchstr, 0)) return null; succ = true; } else for (i = startp; i < matchstr.Length; i++) { posikey.Add(i); if (DoSearch(matchstr, 0)) { succ = true; break; } posikey.RemoveAt(posikey.Count - 1); } if (!succ) return null; //Do capture parsing List tmp = new List (); //Add entire string tmp.Add(matchstr.Substring(posikey[0], posikey[posikey.Count - 1] - posikey[0])); //Add block foreach (Pair ip in groups) if (ip.first == ip.second) tmp.Add(posikey[ip.first].ToString()); else tmp.Add(matchstr.Substring(posikey[ip.first], posikey[ip.second] - posikey[ip.first])); return tmp; } public List DoMatch(string matchstr, int startp, ref int lastp) { List tmp; lock (this) { tmp = _DoMatch(matchstr, startp); if (tmp != null) lastp = posikey[posikey.Count - 1]; tmpMatchable.Clear(); tmpMatchable = null; posikey.Clear(); posikey = null; } return tmp; } } public struct Pair { public I first; public J second; public Pair(I first, J second) { this.first = first; this.second = second; } } 在 06-11-29,tocer<tocer.deng在gmail.com> 写道: > 扯远一点:你说的是从理论到实现,不是从理论到实践。差别很大的:) > > Yunfeng Tao wrote:: > > 呵呵,不可耻不可耻。据我所知,没有这样一个天才。 > > > > 正则表达式、有限自动机等等都是理论计算机科学家,其实都是数学家,在回答"什么是计算"这个问题的时候顺带着搞出来的。那时候还没有计算机呢。几十年后有个牛人发现这个玩意工程上也很有用,于是用理论家们的成果实现了一把。这段历史就是这样,从理论到实践,而不是照着马克思的意思从实践到理论。 > > > > 在 06-11-29,刘鑫<march.liu在gmail.com> 写道: > >> 我试过,可耻的失败了……笑…… > >> 从实践中总结出真理的是少数的天才,我不是,我只有在前人的智慧中成长的份儿。 > >> > >> 2006/11/29, Yunfeng Tao <taoyfeng在gmail.com>: > >>> 你要是认为我说的不对,自己写一个看看。所谓"不能说的这么绝对"云云,不足以反驳我。 > >>> > >>> 2006/11/29, rosetta <rosettas在gmail.com>: > >>>> On 11/29/06, Yunfeng Tao <taoyfeng在gmail.com > wrote: > >>>>> 我再举个例子。 > >>>>> > >>>>> > >> 正则表达式大家都知道吧?怎么写一个正则表达式做search的库?我敢打保票,没学过计算理论的有限自动机部分,写对都是不太可能的,更不要说效率了。 > >>>> 不能说的这么绝对,理论也是在实践中发展出来的. > >>>> 能熟练并且高效使用正则表达式工作的人是可能写出高效的符合要求的库的,即使没学过计算理论. > >>>> 只是对于本身就对正则不够精通的情况下,学过理论是有帮助的. > >>>> > >>>>> 2006/11/29, shhgs <shhgs.efhilt在gmail.com>: > >>>>>> 看来大家对数学的看法不同。 > >>>>>> > >>>>>> > >> 我认为很多东西都属于数学的范畴。数学不仅仅是微积分,行列式。很多算法的设计思路,问题的分析思路都是数学。不要觉得一个问题用到的数学知识很少就认为它不是数学。数学首先是一种思路。 > >>>>>> 下面我举两个例子。 > >>>>>> > >>>>>> 第一是GC。堆里有很多对象,栈里有很多引用。请问你怎样判断堆里的对象是不是垃圾?(copy and > >> delete不算) > >>>>>> > >> 第二,内存检测程序。不要以为这个很简单。因为你程序运行的时候是要占内存的,但是你又怎样判断程序占用的这段内存是不是有问题。这就像是站在桌子上用棍子探哪里凹下去一样。问题是你怎么知道你站的地方是不是凹下去了。 > >>>>>> 这里没有微积分,没有行列式,甚至没有加减乘除,但是这难道不是数学吗? > >>>>>> > >>>>>> On 11/28/06, fdu.xiaojf在gmail.com < fdu.xiaojf在gmail.com> wrote: > >>>>>>> shhgs wrote: > >>>>>>>> 就差半步。 > >>>>>>>> > >>>>>>>> 其实 > >>>>>>>> > >>>>>>>> f(n) = 1/2 * f(n-1) + 1/2 f(n+1) > >>>>>>>> > >>>>>>>> 请问,这个f(n)是什么?是不是一个等差数列? > >>>>>>>> > >>>>>>>> 下面还要再说吗? > >>>>>>>> > >>>>>>> 唉,我白痴了,后面白费那么多力气,怎么一眼没看出来呢,呵呵 > >>>>>>> > >>>>>>> 谢谢! > >>>>>>> _______________________________________________ > >>>>>>> python-chinese > >>>>>>> Post: send python-chinese在lists.python.cn > >>>>>>> Subscribe: send subscribe to > >> python-chinese-request在lists.python.cn > >>>>>>> Unsubscribe: send unsubscribe to > >> python-chinese-request在lists.python.cn > >>>>>>> Detail Info: > >> http://python.cn/mailman/listinfo/python-chinese > >>>>>> _______________________________________________ > >>>>>> python-chinese > >>>>>> Post: send python-chinese在lists.python.cn > >>>>>> Subscribe: send subscribe to > >> python-chinese-request在lists.python.cn > >>>>>> Unsubscribe: send unsubscribe to > >> python-chinese-request在lists.python.cn > >>>>>> Detail Info: > >> http://python.cn/mailman/listinfo/python-chinese > >>>>> _______________________________________________ > >>>>> python-chinese > >>>>> Post: send python-chinese在lists.python.cn > >>>>> Subscribe: send subscribe to > >> python-chinese-request在lists.python.cn > >>>>> Unsubscribe: send unsubscribe to > >> python-chinese-request在lists.python.cn > >>>>> Detail Info: > >> http://python.cn/mailman/listinfo/python-chinese > >>>> > >>>> -- > >>>> with kind regards > >>>> _______________________________________________ > >>>> python-chinese > >>>> Post: send python-chinese在lists.python.cn > >>>> Subscribe: send subscribe to > >> python-chinese-request在lists.python.cn > >>>> Unsubscribe: send unsubscribe to > >> python-chinese-request在lists.python.cn > >>>> Detail Info: > >> http://python.cn/mailman/listinfo/python-chinese > >>> _______________________________________________ > >>> python-chinese > >>> Post: send python-chinese在lists.python.cn > >>> Subscribe: send subscribe to > >> python-chinese-request在lists.python.cn > >>> Unsubscribe: send unsubscribe to > >> python-chinese-request在lists.python.cn > >>> Detail Info: > >> http://python.cn/mailman/listinfo/python-chinese > >> > >> > >> > >> -- > >> 欢迎访问: > >> http://blog.csdn.net/ccat > >> > >> 刘鑫 > >> March.Liu > >> > >> _______________________________________________ > >> python-chinese > >> Post: send python-chinese在lists.python.cn > >> Subscribe: send subscribe to > >> python-chinese-request在lists.python.cn > >> Unsubscribe: send unsubscribe to > >> python-chinese-request在lists.python.cn > >> Detail Info: > >> http://python.cn/mailman/listinfo/python-chinese > >> > > _______________________________________________ > > python-chinese > > Post: send python-chinese在lists.python.cn > > Subscribe: send subscribe to python-chinese-request在lists.python.cn > > Unsubscribe: send unsubscribe to python-chinese-request在lists.python.cn > > Detail Info: http://python.cn/mailman/listinfo/python-chinese > > -- > Vim 中文 Google 论坛 http://groups.google.com/group/Vim-cn > _______________________________________________ > python-chinese > Post: send python-chinese在lists.python.cn > Subscribe: send subscribe to python-chinese-request在lists.python.cn > Unsubscribe: send unsubscribe to python-chinese-request在lists.python.cn > Detail Info: http://python.cn/mailman/listinfo/python-chinese
2007年02月16日 星期五 10:51
稍稍看了一下,没看明白。请问哪里实现了star操作,哪里实现了complement操作? 在 07-2-15,BloodMage<zhouyisu在gmail.com> 写道: > 这是一个小小的正则表达式解析程序,(虽然是自己的格式,并且效率不高,没有记忆化,且写于C#),请指教: > > public class MatchHelper > > { > > struct LangAtom > > { > > string LangConfs; > > bool positive; > > bool greed; > > int least; > > int most; > > const int MAXMATCH = 1073741823; > > public static int construct(string inp, int stp, out LangAtom lang) > > { > > lang = new LangAtom(); > > lang.greed = true; > > lang.least = lang.most = 1; > > //Test LangConf should be > > int ofs = 0; > > string tmp; > > switch (inp.Substring(stp + ofs, 1)) > > { > > case "[": > > ofs++; > > if (inp.Substring(stp + ofs, 1) == "^") > > lang.positive = false; > > else > > { > > lang.positive = true; > > ofs--; > > } > > while ((tmp = inp.Substring(stp + (++ofs), 1)) != "]") > > if (tmp == "%") > > lang.LangConfs += "%" + > inp.Substring(stp + (++ofs)); > > else > > lang.LangConfs += tmp; > > break; > > case "%": > > lang.LangConfs = "%" + inp.Substring(stp + (++ofs), 1); > > lang.positive = true; > > break; > > case ".": > > lang.LangConfs = ""; > > lang.positive = false; > > break; > > default: > > lang.LangConfs = inp.Substring(stp + ofs, 1); > > lang.positive = true; > > break; > > } > > ofs++; > > if (inp.Length>stp+ofs) > > switch (inp.Substring(stp + ofs, 1)) > > { > > case "*": > > lang.most = MAXMATCH; > > lang.least = 0; > > ofs++; > > break; > > case "+": > > lang.most = MAXMATCH; > > ofs++; > > break; > > case "-": > > lang.most = MAXMATCH; > > lang.least = 0; > > lang.greed = false; > > ofs++; > > break; > > case "?": > > lang.least = 0; > > lang.most = 1; > > lang.greed = false; > > ofs++; > > break; > > } > > lang.least--; > > return ofs; > > } > > > > private bool _MatchAtom(string t) > > { > > char tchar=t.ToCharArray()[0]; > > //Try to match single atom > > int posi; > > for (posi = 0; posi < LangConfs.Length; posi++) > > { > > string conf = LangConfs.Substring(posi, 1); > > if (conf == "%") > > { > > //Try to parse special config > > posi++; > > conf = LangConfs.Substring(posi, 1); > > switch (conf) > > { > > case "a": > > if (char.IsLetter(tchar)) > > return true; > > break; > > case "c": > > if (char.IsControl(tchar)) > > return true; > > break; > > case "d": > > if (char.IsDigit(tchar)) > > return true; > > break; > > case "l": > > if (char.IsLower(tchar)) > > return true; > > break; > > case "p": > > if (char.IsPunctuation(tchar)) > > return true; > > break; > > case "s": > > if (char.IsWhiteSpace(tchar)) > > return true; > > break; > > case "u": > > if (char.IsUpper(tchar)) > > return true; > > break; > > case "w": > > if (char.IsLetterOrDigit(tchar)) > > return true; > > break; > > case "x": > > if (char.IsNumber(tchar) || > "abcdefABCDEF".Contains(t)) > > return true; > > break; > > case "z": > > if (t == "\0") > > return true; > > break; > > default: > > if (t == conf) > > return true; > > break; > > } > > > > } > > else > > { > > if (t == conf) > > return true; > > if (LangConfs.Length > posi + 2 && > LangConfs.Substring(posi + 1, 1) == "_") > > { > > posi += 2; > > if (tchar > conf.ToCharArray()[0] && tchar > <= LangConfs.Substring(posi, 1).ToCharArray()[0]) > > return true; > > } > > } > > } > > return false; > > } > > > > public bool MatchAtom(string t) > > { > > return _MatchAtom(t) ^ !positive; > > } > > /* > > * If first called, lastlen=-1 > > * If called again, lastlen=last call returns (length) > > * If no available match, return -1 > > */ > > public int MatchLen(string s, int stp, int lastlen) > > { > > int _last = lastlen; > > if (_last == -2) > > if (greed) > > _last = most + 1; > > else > > _last = least - 1; > > > > //Trust lastlen as Okay > > > > if (greed) > > { > > if (lastlen != -2) > > if (lastlen > least) > > return lastlen - 1; > > else > > return -2; > > _last--; > > //Try to find a solution from _last to least > > int i; > > for (i = 0; i < _last && stp + i < s.Length; i++) > > if (!MatchAtom(s.Substring(stp + i, 1))) > > break; > > i--; > > if (i < least) > > return -2; > > else > > return i; > > } > > else > > { > > _last++; > > //Try to find a solution from _last to most > > int i; > > if (lastlen == -2) > > if (least == -1) > > return -1; > > else > > lastlen = least; > > if (lastlen < 0) lastlen = 0; > > for (i = lastlen; i <= most && stp + i < s.Length; i++) > > if (MatchAtom(s.Substring(stp + i, 1))) > > { > > if (i >= _last) > > return i; > > } > > else > > return -2; > > return -2; > > > > } > > } > > } > > > > Listatoms; > > > > bool TouchHead; > > bool TouchTail; > > > > List> > > > public MatchHelper(string pattern) > > { > > atoms = new List > groups; (); > > groups = new List> > > > //Parse LangAtoms > > Pair >(); tmp = new Pair (); > > int posi = 0; > > > > LangAtom tatom; > > > > if (pattern.StartsWith("^")) > > { > > TouchHead = true; > > posi = 1; > > } > > else > > TouchHead = false; > > TouchTail = false; > > while (posi < pattern.Length) > > { > > if (pattern.Substring(posi, 1) == "(") > > { > > tmp = new Pair(); > > tmp.first = atoms.Count; > > posi++; > > continue; > > } > > if (pattern.Substring(posi, 1) == ")") > > { > > tmp.second = atoms.Count; > > groups.Add(tmp); > > posi++; > > continue; > > } > > if (pattern.Length == posi + 1 && > pattern.Substring(posi, 1) == "$") > > { > > TouchTail = true; > > posi++; > > continue; > > } > > > > posi += LangAtom.construct(pattern, posi, out tatom); > > atoms.Add(tatom); > > } > > } > > > > System.Collections.Generic.Dictionary> tmpMatchable; > > List , bool> posikey; > > > > private bool DoSearch(string matchstr, int level) > > { > > if (atoms.Count == level) > > if (!TouchTail || posikey[posikey.Count - 1] == matchstr.Length) > > return true; > > else > > return false; > > int _prev = -2; > > int last = posikey[posikey.Count - 1]; > > > > if (tmpMatchable.ContainsKey(new Pair(last, level))) > > return false; > > > > for (; ; ) > > { > > _prev = atoms[level].MatchLen(matchstr, last, _prev); > > if (_prev == -2) break; > > > > posikey.Add(_prev + last + 1); > > if (DoSearch(matchstr, level + 1)) > > return true; > > posikey.RemoveAt(posikey.Count - 1); > > } > > > > tmpMatchable.Add(new Pair(last, level), false); > > return false; > > } > > > > private List_DoMatch(string matchstr, int startp) > > { > > tmpMatchable = new Dictionary> > posikey = new List , bool>(); (); > > if (TouchHead && startp != 0) > > return null; > > int i; > > bool succ = false; > > if (TouchHead) > > { > > posikey.Add(startp); > > if (!DoSearch(matchstr, 0)) > > return null; > > succ = true; > > } > > else > > for (i = startp; i < matchstr.Length; i++) > > { > > posikey.Add(i); > > if (DoSearch(matchstr, 0)) > > { > > succ = true; > > break; > > } > > posikey.RemoveAt(posikey.Count - 1); > > } > > if (!succ) > > return null; > > > > //Do capture parsing > > Listtmp = new List (); > > > > //Add entire string > > tmp.Add(matchstr.Substring(posikey[0], > posikey[posikey.Count - 1] - posikey[0])); > > > > //Add block > > foreach (Pairip in groups) > > if (ip.first == ip.second) > > tmp.Add(posikey[ip.first].ToString()); > > else > > tmp.Add(matchstr.Substring(posikey[ip.first], > posikey[ip.second] - posikey[ip.first])); > > > > return tmp; > > } > > > > public ListDoMatch(string matchstr, int startp, ref int lastp) > > { > > Listtmp; > > lock (this) > > { > > tmp = _DoMatch(matchstr, startp); > > if (tmp != null) lastp = posikey[posikey.Count - 1]; > > tmpMatchable.Clear(); > > tmpMatchable = null; > > posikey.Clear(); > > posikey = null; > > } > > return tmp; > > } > > } > > > > public struct Pair > > { > > public I first; > > public J second; > > public Pair(I first, J second) > > { > > this.first = first; > > this.second = second; > > } > > } > > 在 06-11-29,tocer<tocer.deng在gmail.com> 写道: > > 扯远一点:你说的是从理论到实现,不是从理论到实践。差别很大的:) > > > > Yunfeng Tao wrote:: > > > 呵呵,不可耻不可耻。据我所知,没有这样一个天才。 > > > > > > 正则表达式、有限自动机等等都是理论计算机科学家,其实都是数学家,在回答"什么是计算"这个问题的时候顺带着搞出来的。那时候还没有计算机呢。几十年后有个牛人发现这个玩意工程上也很有用,于是用理论家们的成果实现了一把。这段历史就是这样,从理论到实践,而不是照着马克思的意思从实践到理论。 > > > > > > 在 06-11-29,刘鑫<march.liu在gmail.com> 写道: > > >> 我试过,可耻的失败了……笑…… > > >> 从实践中总结出真理的是少数的天才,我不是,我只有在前人的智慧中成长的份儿。 > > >> > > >> 2006/11/29, Yunfeng Tao <taoyfeng在gmail.com>: > > >>> 你要是认为我说的不对,自己写一个看看。所谓"不能说的这么绝对"云云,不足以反驳我。 > > >>> > > >>> 2006/11/29, rosetta <rosettas在gmail.com>: > > >>>> On 11/29/06, Yunfeng Tao <taoyfeng在gmail.com > wrote: > > >>>>> 我再举个例子。 > > >>>>> > > >>>>> > > >> 正则表达式大家都知道吧?怎么写一个正则表达式做search的库?我敢打保票,没学过计算理论的有限自动机部分,写对都是不太可能的,更不要说效率了。 > > >>>> 不能说的这么绝对,理论也是在实践中发展出来的. > > >>>> 能熟练并且高效使用正则表达式工作的人是可能写出高效的符合要求的库的,即使没学过计算理论. > > >>>> 只是对于本身就对正则不够精通的情况下,学过理论是有帮助的. > > >>>> > > >>>>> 2006/11/29, shhgs <shhgs.efhilt在gmail.com>: > > >>>>>> 看来大家对数学的看法不同。 > > >>>>>> > > >>>>>> > > >> 我认为很多东西都属于数学的范畴。数学不仅仅是微积分,行列式。很多算法的设计思路,问题的分析思路都是数学。不要觉得一个问题用到的数学知识很少就认为它不是数学。数学首先是一种思路。 > > >>>>>> 下面我举两个例子。 > > >>>>>> > > >>>>>> 第一是GC。堆里有很多对象,栈里有很多引用。请问你怎样判断堆里的对象是不是垃圾?(copy and > > >> delete不算) > > >>>>>> > > >> 第二,内存检测程序。不要以为这个很简单。因为你程序运行的时候是要占内存的,但是你又怎样判断程序占用的这段内存是不是有问题。这就像是站在桌子上用棍子探哪里凹下去一样。问题是你怎么知道你站的地方是不是凹下去了。 > > >>>>>> 这里没有微积分,没有行列式,甚至没有加减乘除,但是这难道不是数学吗? > > >>>>>> > > >>>>>> On 11/28/06, fdu.xiaojf在gmail.com < fdu.xiaojf在gmail.com> wrote: > > >>>>>>> shhgs wrote: > > >>>>>>>> 就差半步。 > > >>>>>>>> > > >>>>>>>> 其实 > > >>>>>>>> > > >>>>>>>> f(n) = 1/2 * f(n-1) + 1/2 f(n+1) > > >>>>>>>> > > >>>>>>>> 请问,这个f(n)是什么?是不是一个等差数列? > > >>>>>>>> > > >>>>>>>> 下面还要再说吗? > > >>>>>>>> > > >>>>>>> 唉,我白痴了,后面白费那么多力气,怎么一眼没看出来呢,呵呵 > > >>>>>>> > > >>>>>>> 谢谢! > > >>>>>>> _______________________________________________ > > >>>>>>> python-chinese > > >>>>>>> Post: send python-chinese在lists.python.cn > > >>>>>>> Subscribe: send subscribe to > > >> python-chinese-request在lists.python.cn > > >>>>>>> Unsubscribe: send unsubscribe to > > >> python-chinese-request在lists.python.cn > > >>>>>>> Detail Info: > > >> http://python.cn/mailman/listinfo/python-chinese > > >>>>>> _______________________________________________ > > >>>>>> python-chinese > > >>>>>> Post: send python-chinese在lists.python.cn > > >>>>>> Subscribe: send subscribe to > > >> python-chinese-request在lists.python.cn > > >>>>>> Unsubscribe: send unsubscribe to > > >> python-chinese-request在lists.python.cn > > >>>>>> Detail Info: > > >> http://python.cn/mailman/listinfo/python-chinese > > >>>>> _______________________________________________ > > >>>>> python-chinese > > >>>>> Post: send python-chinese在lists.python.cn > > >>>>> Subscribe: send subscribe to > > >> python-chinese-request在lists.python.cn > > >>>>> Unsubscribe: send unsubscribe to > > >> python-chinese-request在lists.python.cn > > >>>>> Detail Info: > > >> http://python.cn/mailman/listinfo/python-chinese > > >>>> > > >>>> -- > > >>>> with kind regards > > >>>> _______________________________________________ > > >>>> python-chinese > > >>>> Post: send python-chinese在lists.python.cn > > >>>> Subscribe: send subscribe to > > >> python-chinese-request在lists.python.cn > > >>>> Unsubscribe: send unsubscribe to > > >> python-chinese-request在lists.python.cn > > >>>> Detail Info: > > >> http://python.cn/mailman/listinfo/python-chinese > > >>> _______________________________________________ > > >>> python-chinese > > >>> Post: send python-chinese在lists.python.cn > > >>> Subscribe: send subscribe to > > >> python-chinese-request在lists.python.cn > > >>> Unsubscribe: send unsubscribe to > > >> python-chinese-request在lists.python.cn > > >>> Detail Info: > > >> http://python.cn/mailman/listinfo/python-chinese > > >> > > >> > > >> > > >> -- > > >> 欢迎访问: > > >> http://blog.csdn.net/ccat > > >> > > >> 刘鑫 > > >> March.Liu > > >> > > >> _______________________________________________ > > >> python-chinese > > >> Post: send python-chinese在lists.python.cn > > >> Subscribe: send subscribe to > > >> python-chinese-request在lists.python.cn > > >> Unsubscribe: send unsubscribe to > > >> python-chinese-request在lists.python.cn > > >> Detail Info: > > >> http://python.cn/mailman/listinfo/python-chinese > > >> > > > _______________________________________________ > > > python-chinese > > > Post: send python-chinese在lists.python.cn > > > Subscribe: send subscribe to python-chinese-request在lists.python.cn > > > Unsubscribe: send unsubscribe to python-chinese-request在lists.python.cn > > > Detail Info: http://python.cn/mailman/listinfo/python-chinese > > > > -- > > Vim 中文 Google 论坛 http://groups.google.com/group/Vim-cn > > _______________________________________________ > > python-chinese > > Post: send python-chinese在lists.python.cn > > Subscribe: send subscribe to python-chinese-request在lists.python.cn > > Unsubscribe: send unsubscribe to python-chinese-request在lists.python.cn > > Detail Info: http://python.cn/mailman/listinfo/python-chinese > _______________________________________________ > python-chinese > Post: send python-chinese在lists.python.cn > Subscribe: send subscribe to python-chinese-request在lists.python.cn > Unsubscribe: send unsubscribe to python-chinese-request在lists.python.cn > Detail Info: http://python.cn/mailman/listinfo/python-chinese
2007年02月17日 星期六 23:52
小弟不才请问什么是star操作和complement操作? 我的正则表达式功能很简单,只有group,元素组[0123456789],匹配长度+-?,全匹配符. 在 07-2-16,Yunfeng Tao<taoyfeng在gmail.com> 写道: > 稍稍看了一下,没看明白。请问哪里实现了star操作,哪里实现了complement操作? > > 在 07-2-15,BloodMage<zhouyisu在gmail.com> 写道: > > 这是一个小小的正则表达式解析程序,(虽然是自己的格式,并且效率不高,没有记忆化,且写于C#),请指教: > > > > public class MatchHelper > > > > { > > > > struct LangAtom > > > > { > > > > string LangConfs; > > > > bool positive; > > > > bool greed; > > > > int least; > > > > int most; > > > > const int MAXMATCH = 1073741823; > > > > public static int construct(string inp, int stp, out LangAtom lang) > > > > { > > > > lang = new LangAtom(); > > > > lang.greed = true; > > > > lang.least = lang.most = 1; > > > > //Test LangConf should be > > > > int ofs = 0; > > > > string tmp; > > > > switch (inp.Substring(stp + ofs, 1)) > > > > { > > > > case "[": > > > > ofs++; > > > > if (inp.Substring(stp + ofs, 1) == "^") > > > > lang.positive = false; > > > > else > > > > { > > > > lang.positive = true; > > > > ofs--; > > > > } > > > > while ((tmp = inp.Substring(stp + (++ofs), 1)) != "]") > > > > if (tmp == "%") > > > > lang.LangConfs += "%" + > > inp.Substring(stp + (++ofs)); > > > > else > > > > lang.LangConfs += tmp; > > > > break; > > > > case "%": > > > > lang.LangConfs = "%" + inp.Substring(stp + (++ofs), 1); > > > > lang.positive = true; > > > > break; > > > > case ".": > > > > lang.LangConfs = ""; > > > > lang.positive = false; > > > > break; > > > > default: > > > > lang.LangConfs = inp.Substring(stp + ofs, 1); > > > > lang.positive = true; > > > > break; > > > > } > > > > ofs++; > > > > if (inp.Length>stp+ofs) > > > > switch (inp.Substring(stp + ofs, 1)) > > > > { > > > > case "*": > > > > lang.most = MAXMATCH; > > > > lang.least = 0; > > > > ofs++; > > > > break; > > > > case "+": > > > > lang.most = MAXMATCH; > > > > ofs++; > > > > break; > > > > case "-": > > > > lang.most = MAXMATCH; > > > > lang.least = 0; > > > > lang.greed = false; > > > > ofs++; > > > > break; > > > > case "?": > > > > lang.least = 0; > > > > lang.most = 1; > > > > lang.greed = false; > > > > ofs++; > > > > break; > > > > } > > > > lang.least--; > > > > return ofs; > > > > } > > > > > > > > private bool _MatchAtom(string t) > > > > { > > > > char tchar=t.ToCharArray()[0]; > > > > //Try to match single atom > > > > int posi; > > > > for (posi = 0; posi < LangConfs.Length; posi++) > > > > { > > > > string conf = LangConfs.Substring(posi, 1); > > > > if (conf == "%") > > > > { > > > > //Try to parse special config > > > > posi++; > > > > conf = LangConfs.Substring(posi, 1); > > > > switch (conf) > > > > { > > > > case "a": > > > > if (char.IsLetter(tchar)) > > > > return true; > > > > break; > > > > case "c": > > > > if (char.IsControl(tchar)) > > > > return true; > > > > break; > > > > case "d": > > > > if (char.IsDigit(tchar)) > > > > return true; > > > > break; > > > > case "l": > > > > if (char.IsLower(tchar)) > > > > return true; > > > > break; > > > > case "p": > > > > if (char.IsPunctuation(tchar)) > > > > return true; > > > > break; > > > > case "s": > > > > if (char.IsWhiteSpace(tchar)) > > > > return true; > > > > break; > > > > case "u": > > > > if (char.IsUpper(tchar)) > > > > return true; > > > > break; > > > > case "w": > > > > if (char.IsLetterOrDigit(tchar)) > > > > return true; > > > > break; > > > > case "x": > > > > if (char.IsNumber(tchar) || > > "abcdefABCDEF".Contains(t)) > > > > return true; > > > > break; > > > > case "z": > > > > if (t == "\0") > > > > return true; > > > > break; > > > > default: > > > > if (t == conf) > > > > return true; > > > > break; > > > > } > > > > > > > > } > > > > else > > > > { > > > > if (t == conf) > > > > return true; > > > > if (LangConfs.Length > posi + 2 && > > LangConfs.Substring(posi + 1, 1) == "_") > > > > { > > > > posi += 2; > > > > if (tchar > conf.ToCharArray()[0] && tchar > > <= LangConfs.Substring(posi, 1).ToCharArray()[0]) > > > > return true; > > > > } > > > > } > > > > } > > > > return false; > > > > } > > > > > > > > public bool MatchAtom(string t) > > > > { > > > > return _MatchAtom(t) ^ !positive; > > > > } > > > > /* > > > > * If first called, lastlen=-1 > > > > * If called again, lastlen=last call returns (length) > > > > * If no available match, return -1 > > > > */ > > > > public int MatchLen(string s, int stp, int lastlen) > > > > { > > > > int _last = lastlen; > > > > if (_last == -2) > > > > if (greed) > > > > _last = most + 1; > > > > else > > > > _last = least - 1; > > > > > > > > //Trust lastlen as Okay > > > > > > > > if (greed) > > > > { > > > > if (lastlen != -2) > > > > if (lastlen > least) > > > > return lastlen - 1; > > > > else > > > > return -2; > > > > _last--; > > > > //Try to find a solution from _last to least > > > > int i; > > > > for (i = 0; i < _last && stp + i < s.Length; i++) > > > > if (!MatchAtom(s.Substring(stp + i, 1))) > > > > break; > > > > i--; > > > > if (i < least) > > > > return -2; > > > > else > > > > return i; > > > > } > > > > else > > > > { > > > > _last++; > > > > //Try to find a solution from _last to most > > > > int i; > > > > if (lastlen == -2) > > > > if (least == -1) > > > > return -1; > > > > else > > > > lastlen = least; > > > > if (lastlen < 0) lastlen = 0; > > > > for (i = lastlen; i <= most && stp + i < s.Length; i++) > > > > if (MatchAtom(s.Substring(stp + i, 1))) > > > > { > > > > if (i >= _last) > > > > return i; > > > > } > > > > else > > > > return -2; > > > > return -2; > > > > > > > > } > > > > } > > > > } > > > > > > > > Listatoms; > > > > > > > > bool TouchHead; > > > > bool TouchTail; > > > > > > > > List> > > > > > > > public MatchHelper(string pattern) > > > > { > > > > atoms = new List > groups; (); > > > > groups = new List> > > > > > > > //Parse LangAtoms > > > > Pair >(); tmp = new Pair (); > > > > int posi = 0; > > > > > > > > LangAtom tatom; > > > > > > > > if (pattern.StartsWith("^")) > > > > { > > > > TouchHead = true; > > > > posi = 1; > > > > } > > > > else > > > > TouchHead = false; > > > > TouchTail = false; > > > > while (posi < pattern.Length) > > > > { > > > > if (pattern.Substring(posi, 1) == "(") > > > > { > > > > tmp = new Pair(); > > > > tmp.first = atoms.Count; > > > > posi++; > > > > continue; > > > > } > > > > if (pattern.Substring(posi, 1) == ")") > > > > { > > > > tmp.second = atoms.Count; > > > > groups.Add(tmp); > > > > posi++; > > > > continue; > > > > } > > > > if (pattern.Length == posi + 1 && > > pattern.Substring(posi, 1) == "$") > > > > { > > > > TouchTail = true; > > > > posi++; > > > > continue; > > > > } > > > > > > > > posi += LangAtom.construct(pattern, posi, out tatom); > > > > atoms.Add(tatom); > > > > } > > > > } > > > > > > > > System.Collections.Generic.Dictionary> > tmpMatchable; > > > > List , bool> posikey; > > > > > > > > private bool DoSearch(string matchstr, int level) > > > > { > > > > if (atoms.Count == level) > > > > if (!TouchTail || posikey[posikey.Count - 1] == matchstr.Length) > > > > return true; > > > > else > > > > return false; > > > > int _prev = -2; > > > > int last = posikey[posikey.Count - 1]; > > > > > > > > if (tmpMatchable.ContainsKey(new Pair(last, level))) > > > > return false; > > > > > > > > for (; ; ) > > > > { > > > > _prev = atoms[level].MatchLen(matchstr, last, _prev); > > > > if (_prev == -2) break; > > > > > > > > posikey.Add(_prev + last + 1); > > > > if (DoSearch(matchstr, level + 1)) > > > > return true; > > > > posikey.RemoveAt(posikey.Count - 1); > > > > } > > > > > > > > tmpMatchable.Add(new Pair(last, level), false); > > > > return false; > > > > } > > > > > > > > private List_DoMatch(string matchstr, int startp) > > > > { > > > > tmpMatchable = new Dictionary> > > > posikey = new List , bool>(); (); > > > > if (TouchHead && startp != 0) > > > > return null; > > > > int i; > > > > bool succ = false; > > > > if (TouchHead) > > > > { > > > > posikey.Add(startp); > > > > if (!DoSearch(matchstr, 0)) > > > > return null; > > > > succ = true; > > > > } > > > > else > > > > for (i = startp; i < matchstr.Length; i++) > > > > { > > > > posikey.Add(i); > > > > if (DoSearch(matchstr, 0)) > > > > { > > > > succ = true; > > > > break; > > > > } > > > > posikey.RemoveAt(posikey.Count - 1); > > > > } > > > > if (!succ) > > > > return null; > > > > > > > > //Do capture parsing > > > > Listtmp = new List (); > > > > > > > > //Add entire string > > > > tmp.Add(matchstr.Substring(posikey[0], > > posikey[posikey.Count - 1] - posikey[0])); > > > > > > > > //Add block > > > > foreach (Pairip in groups) > > > > if (ip.first == ip.second) > > > > tmp.Add(posikey[ip.first].ToString()); > > > > else > > > > tmp.Add(matchstr.Substring(posikey[ip.first], > > posikey[ip.second] - posikey[ip.first])); > > > > > > > > return tmp; > > > > } > > > > > > > > public ListDoMatch(string matchstr, int startp, ref int lastp) > > > > { > > > > Listtmp; > > > > lock (this) > > > > { > > > > tmp = _DoMatch(matchstr, startp); > > > > if (tmp != null) lastp = posikey[posikey.Count - 1]; > > > > tmpMatchable.Clear(); > > > > tmpMatchable = null; > > > > posikey.Clear(); > > > > posikey = null; > > > > } > > > > return tmp; > > > > } > > > > } > > > > > > > > public struct Pair > > > > { > > > > public I first; > > > > public J second; > > > > public Pair(I first, J second) > > > > { > > > > this.first = first; > > > > this.second = second; > > > > } > > > > } > > > > 在 06-11-29,tocer<tocer.deng在gmail.com> 写道: > > > 扯远一点:你说的是从理论到实现,不是从理论到实践。差别很大的:) > > > > > > Yunfeng Tao wrote:: > > > > 呵呵,不可耻不可耻。据我所知,没有这样一个天才。 > > > > > > > > 正则表达式、有限自动机等等都是理论计算机科学家,其实都是数学家,在回答"什么是计算"这个问题的时候顺带着搞出来的。那时候还没有计算机呢。几十年后有个牛人发现这个玩意工程上也很有用,于是用理论家们的成果实现了一把。这段历史就是这样,从理论到实践,而不是照着马克思的意思从实践到理论。 > > > > > > > > 在 06-11-29,刘鑫<march.liu在gmail.com> 写道: > > > >> 我试过,可耻的失败了……笑…… > > > >> 从实践中总结出真理的是少数的天才,我不是,我只有在前人的智慧中成长的份儿。 > > > >> > > > >> 2006/11/29, Yunfeng Tao <taoyfeng在gmail.com>: > > > >>> 你要是认为我说的不对,自己写一个看看。所谓"不能说的这么绝对"云云,不足以反驳我。 > > > >>> > > > >>> 2006/11/29, rosetta <rosettas在gmail.com>: > > > >>>> On 11/29/06, Yunfeng Tao <taoyfeng在gmail.com > wrote: > > > >>>>> 我再举个例子。 > > > >>>>> > > > >>>>> > > > >> 正则表达式大家都知道吧?怎么写一个正则表达式做search的库?我敢打保票,没学过计算理论的有限自动机部分,写对都是不太可能的,更不要说效率了。 > > > >>>> 不能说的这么绝对,理论也是在实践中发展出来的. > > > >>>> 能熟练并且高效使用正则表达式工作的人是可能写出高效的符合要求的库的,即使没学过计算理论. > > > >>>> 只是对于本身就对正则不够精通的情况下,学过理论是有帮助的. > > > >>>> > > > >>>>> 2006/11/29, shhgs <shhgs.efhilt在gmail.com>: > > > >>>>>> 看来大家对数学的看法不同。 > > > >>>>>> > > > >>>>>> > > > >> 我认为很多东西都属于数学的范畴。数学不仅仅是微积分,行列式。很多算法的设计思路,问题的分析思路都是数学。不要觉得一个问题用到的数学知识很少就认为它不是数学。数学首先是一种思路。 > > > >>>>>> 下面我举两个例子。 > > > >>>>>> > > > >>>>>> 第一是GC。堆里有很多对象,栈里有很多引用。请问你怎样判断堆里的对象是不是垃圾?(copy and > > > >> delete不算) > > > >>>>>> > > > >> 第二,内存检测程序。不要以为这个很简单。因为你程序运行的时候是要占内存的,但是你又怎样判断程序占用的这段内存是不是有问题。这就像是站在桌子上用棍子探哪里凹下去一样。问题是你怎么知道你站的地方是不是凹下去了。 > > > >>>>>> 这里没有微积分,没有行列式,甚至没有加减乘除,但是这难道不是数学吗? > > > >>>>>> > > > >>>>>> On 11/28/06, fdu.xiaojf在gmail.com < fdu.xiaojf在gmail.com> wrote: > > > >>>>>>> shhgs wrote: > > > >>>>>>>> 就差半步。 > > > >>>>>>>> > > > >>>>>>>> 其实 > > > >>>>>>>> > > > >>>>>>>> f(n) = 1/2 * f(n-1) + 1/2 f(n+1) > > > >>>>>>>> > > > >>>>>>>> 请问,这个f(n)是什么?是不是一个等差数列? > > > >>>>>>>> > > > >>>>>>>> 下面还要再说吗? > > > >>>>>>>> > > > >>>>>>> 唉,我白痴了,后面白费那么多力气,怎么一眼没看出来呢,呵呵 > > > >>>>>>> > > > >>>>>>> 谢谢! > > > >>>>>>> _______________________________________________ > > > >>>>>>> python-chinese > > > >>>>>>> Post: send python-chinese在lists.python.cn > > > >>>>>>> Subscribe: send subscribe to > > > >> python-chinese-request在lists.python.cn > > > >>>>>>> Unsubscribe: send unsubscribe to > > > >> python-chinese-request在lists.python.cn > > > >>>>>>> Detail Info: > > > >> http://python.cn/mailman/listinfo/python-chinese > > > >>>>>> _______________________________________________ > > > >>>>>> python-chinese > > > >>>>>> Post: send python-chinese在lists.python.cn > > > >>>>>> Subscribe: send subscribe to > > > >> python-chinese-request在lists.python.cn > > > >>>>>> Unsubscribe: send unsubscribe to > > > >> python-chinese-request在lists.python.cn > > > >>>>>> Detail Info: > > > >> http://python.cn/mailman/listinfo/python-chinese > > > >>>>> _______________________________________________ > > > >>>>> python-chinese > > > >>>>> Post: send python-chinese在lists.python.cn > > > >>>>> Subscribe: send subscribe to > > > >> python-chinese-request在lists.python.cn > > > >>>>> Unsubscribe: send unsubscribe to > > > >> python-chinese-request在lists.python.cn > > > >>>>> Detail Info: > > > >> http://python.cn/mailman/listinfo/python-chinese > > > >>>> > > > >>>> -- > > > >>>> with kind regards > > > >>>> _______________________________________________ > > > >>>> python-chinese > > > >>>> Post: send python-chinese在lists.python.cn > > > >>>> Subscribe: send subscribe to > > > >> python-chinese-request在lists.python.cn > > > >>>> Unsubscribe: send unsubscribe to > > > >> python-chinese-request在lists.python.cn > > > >>>> Detail Info: > > > >> http://python.cn/mailman/listinfo/python-chinese > > > >>> _______________________________________________ > > > >>> python-chinese > > > >>> Post: send python-chinese在lists.python.cn > > > >>> Subscribe: send subscribe to > > > >> python-chinese-request在lists.python.cn > > > >>> Unsubscribe: send unsubscribe to > > > >> python-chinese-request在lists.python.cn > > > >>> Detail Info: > > > >> http://python.cn/mailman/listinfo/python-chinese > > > >> > > > >> > > > >> > > > >> -- > > > >> 欢迎访问: > > > >> http://blog.csdn.net/ccat > > > >> > > > >> 刘鑫 > > > >> March.Liu > > > >> > > > >> _______________________________________________ > > > >> python-chinese > > > >> Post: send python-chinese在lists.python.cn > > > >> Subscribe: send subscribe to > > > >> python-chinese-request在lists.python.cn > > > >> Unsubscribe: send unsubscribe to > > > >> python-chinese-request在lists.python.cn > > > >> Detail Info: > > > >> http://python.cn/mailman/listinfo/python-chinese > > > >> > > > > _______________________________________________ > > > > python-chinese > > > > Post: send python-chinese在lists.python.cn > > > > Subscribe: send subscribe to python-chinese-request在lists.python.cn > > > > Unsubscribe: send unsubscribe to python-chinese-request在lists.python.cn > > > > Detail Info: http://python.cn/mailman/listinfo/python-chinese > > > > > > -- > > > Vim 中文 Google 论坛 http://groups.google.com/group/Vim-cn > > > _______________________________________________ > > > python-chinese > > > Post: send python-chinese在lists.python.cn > > > Subscribe: send subscribe to python-chinese-request在lists.python.cn > > > Unsubscribe: send unsubscribe to python-chinese-request在lists.python.cn > > > Detail Info: http://python.cn/mailman/listinfo/python-chinese > > _______________________________________________ > > python-chinese > > Post: send python-chinese在lists.python.cn > > Subscribe: send subscribe to python-chinese-request在lists.python.cn > > Unsubscribe: send unsubscribe to python-chinese-request在lists.python.cn > > Detail Info: http://python.cn/mailman/listinfo/python-chinese > _______________________________________________ > python-chinese > Post: send python-chinese在lists.python.cn > Subscribe: send subscribe to python-chinese-request在lists.python.cn > Unsubscribe: send unsubscribe to python-chinese-request在lists.python.cn > Detail Info: http://python.cn/mailman/listinfo/python-chinese
Zeuux © 2025
京ICP备05028076号