parser.py

Go to the documentation of this file.
00001 from silme.core.entity import EntityList, Entity
00002 from structure import *
00003 import re
00004 
00005 class DTDParser():
00006     name_start_char = u':A-Z_a-z\xC0-\xD6\xD8-\xF6\xF8-\u02FF' + \
00007             u'\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF'+\
00008             u'\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD'
00009     name_char = name_start_char + ur'\-\.0-9' + u'\xB7\u0300-\u036F\u203F-\u2040'
00010     name = u'[' + name_start_char + u'][' + name_char + u']*'
00011 
00012     patterns = {}
00013     patterns['entity'] = re.compile(u'<!ENTITY\s+(' + name + u')\s+((?:\"[^\"]*\")|(?:\'[^\']*\'))\s*>', re.S|re.U)
00014     patterns['comment'] = re.compile(u'<!\s*--(.*?)(?:--\s*>)', re.M|re.S)
00015 
00016     @classmethod
00017     def parse(cls, text, code='default'):
00018         dtd = DTDStructure()
00019         cls.build_element_list(text, dtd, code=code)
00020         dtd.fallback = code
00021         return dtd
00022 
00023     @classmethod
00024     def parse_to_entitylist(cls, text, code='default'):
00025         entitylist = EntityList()
00026         text = cls.patterns['comment'].sub('', text)
00027         matchlist = cls.patterns['entity'].findall(text)
00028         for match in matchlist:
00029             entitylist.add_entity(Entity(match[0], match[1][1:-1], code))
00030         return entitylist
00031 
00032     @classmethod
00033     def parse_entity(cls, text, code='default'):
00034         match = self.patterns['entity'].match(text)
00035         if not match:
00036             raise Exception()
00037         entity = Entity(match.group(0))
00038         entity.set_value(match.group(1)[1:-1], code)
00039         return entity
00040 
00041 
00042     @classmethod
00043     def build_element_list (cls, text, object, type='comment', code='default', pointer=0, end=None):
00044         cls.split_comments(text, object, code)
00045 
00046     @classmethod
00047     def split_comments (cls, text, object, code='default', pointer=0, end=None):
00048         pattern = cls.patterns['comment']
00049         if end:
00050             match = pattern.search(text, pointer, end)
00051         else:
00052             match = pattern.search(text, pointer)
00053         while match:
00054             st0 = match.start(0)
00055             if st0 > pointer:
00056                 cls.split_entities(text, object, code=code, pointer=pointer, end=st0)
00057             comment = Comment()
00058             cls.split_entities(match.group(1), comment, code=code)
00059             object.append(comment)
00060             pointer = match.end(0)
00061             if end:
00062                 match = pattern.search(text, pointer, end)
00063             else:
00064                 match = pattern.search(text, pointer)
00065         if len(text) > pointer:
00066             cls.split_entities(text, object, code=code, pointer=pointer)
00067 
00068     @classmethod
00069     def split_entities (cls, text, object, code='default', pointer=0, end=None):
00070         pattern = cls.patterns['entity']
00071         if end:
00072             match = pattern.search(text, pointer, end)
00073         else:
00074             match = pattern.search(text, pointer)
00075         while match:
00076             st0 = match.start(0)
00077             if st0 > pointer:
00078                 object.append(text[pointer:st0])
00079             groups = match.groups()
00080             entity = Entity(groups[0])
00081             entity.set_value(groups[1][1:-1], code)
00082             entity.params['source'] = {'type':'dtd',
00083                                         'string':match.group(0),
00084                                         'valpos':match.start(2)+1-st0}
00085             object.append(entity)
00086             pointer = match.end(0)
00087             if end:
00088                 match = pattern.search(text, pointer, end)
00089             else:
00090                 match = pattern.search(text, pointer)
00091         if (not end or (end > pointer)) and len(text) > pointer:
00092             if end:
00093                 object.append(text[pointer:end])
00094             else:
00095                 object.append(text[pointer:])

Generated on Tue May 12 17:37:27 2009 for silme by  doxygen 1.5.8