parser.py

Go to the documentation of this file.
00001 from silme.core.entity import EntityList, Entity
00002 from structure import *
00003 import re
00004 
00005 class GettextParser():
00006     patterns = {}
00007     patterns['entity'] = re.compile('^msgid "([^"]*)"\nmsgstr ((?:"[^"]*"\n?)*(?:"[^"]*"))$',re.M|re.S)
00008     patterns['comment'] = re.compile('^#([^\n]*)$',re.M)
00009     patterns['msgctxt'] = re.compile('^msgctxt [^\n]*\n',re.M|re.S)
00010 
00011     @classmethod
00012     def parse(cls, text, code='default'):
00013         po = GettextStructure()
00014         cls.build_element_list(text, po, code=code)
00015         po.fallback = code
00016         return po
00017 
00018     @classmethod
00019     def parse_to_entitylist(cls, text, code='default'):
00020         entityList = EntityList()
00021         text = cls.patterns['comment'].sub('', text)
00022         matchlist = cls.patterns['entity'].findall(text)
00023         for match in matchlist:
00024             if match[0]:
00025                 entityList.add_entity(Entity(match[0], match[1], code))
00026         return entityList
00027 
00028     @classmethod
00029     def build_element_list (cls, text, object, type='comment', code='default', pointer=0, end=None):
00030         cls.split_msgctxt(text, object, code)
00031 
00032     @classmethod
00033     def split_msgctxt(cls, text, object, code='default', pointer=0, end=None):
00034         '''
00035         this method removes all msgctxt for now (we don't know how to parse them anyway)
00036         '''
00037         pattern = cls.patterns['msgctxt']
00038         text = re.sub(pattern, '', text)
00039         cls.split_comments(text, object, code=code, pointer=pointer, end=end)
00040 
00041     @classmethod
00042     def split_comments (cls, text, object, code='default', pointer=0, end=None):
00043         pattern = cls.patterns['comment']
00044         if end:
00045             match = pattern.search(text, pointer, end)
00046         else:
00047             match = pattern.search(text, pointer)
00048         while match:
00049             st0 = match.start(0)
00050             if st0 > pointer:
00051                 cls.split_entities(text, object, code=code, pointer=pointer, end=st0)
00052             comment = Comment()
00053             cls.split_entities(match.group(1), comment, code=code)
00054             object.append(comment)
00055             pointer = match.end(0)
00056             if end:
00057                 match = pattern.search(text, pointer, end)
00058             else:
00059                 match = pattern.search(text, pointer)
00060         if len(text) > pointer:
00061             cls.split_entities(text, object, code=code, pointer=pointer)
00062 
00063     @classmethod
00064     def split_entities (cls, text, object, code='default', pointer=0, end=None):
00065         pattern = cls.patterns['entity']
00066         if end:
00067             match = pattern.search(text, pointer, end)
00068         else:
00069             match = pattern.search(text, pointer)
00070         while match:
00071             if match.start(0) > pointer:
00072                 object.append(text[pointer:match.start(0)])
00073             entity = Entity(match.group(1), cls._clean_value(match.group(2)))
00074             entity.params['source'] = {'type': 'gettext',
00075                                         'string': match.group(0),
00076                                         'valpos':match.start(2)-match.start(0)}
00077             object.append(entity)
00078             pointer = match.end(0)
00079             if end:
00080                 match = pattern.search(text, pointer, end)
00081             else:
00082                 match = pattern.search(text, pointer)
00083         if (not end or (end > pointer)) and len(text) > pointer:
00084             if end:
00085                 object.append(text[pointer:end])
00086             else:
00087                 object.append(text[pointer:])
00088         return object
00089 
00090     @classmethod
00091     def _clean_value(cls, text):
00092         return text.replace('"\n"', '')[1:-1]

Generated on Tue May 12 17:37:27 2009 for silme by  doxygen 1.5.8