00001 from silme.core.entity import EntityList, Entity
00002 from structure import *
00003 import re
00004
00005 class GettextParser():
00006 patterns = {}
00007 patterns['entity'] = re.compile('^msgid "([^"]*)"\nmsgstr ((?:"[^"]*"\n?)*(?:"[^"]*"))$',re.M|re.S)
00008 patterns['comment'] = re.compile('^#([^\n]*)$',re.M)
00009 patterns['msgctxt'] = re.compile('^msgctxt [^\n]*\n',re.M|re.S)
00010
00011 @classmethod
00012 def parse(cls, text, code='default'):
00013 po = GettextStructure()
00014 cls.build_element_list(text, po, code=code)
00015 po.fallback = code
00016 return po
00017
00018 @classmethod
00019 def parse_to_entitylist(cls, text, code='default'):
00020 entityList = EntityList()
00021 text = cls.patterns['comment'].sub('', text)
00022 matchlist = cls.patterns['entity'].findall(text)
00023 for match in matchlist:
00024 if match[0]:
00025 entityList.add_entity(Entity(match[0], match[1], code))
00026 return entityList
00027
00028 @classmethod
00029 def build_element_list (cls, text, object, type='comment', code='default', pointer=0, end=None):
00030 cls.split_msgctxt(text, object, code)
00031
00032 @classmethod
00033 def split_msgctxt(cls, text, object, code='default', pointer=0, end=None):
00034 '''
00035 this method removes all msgctxt for now (we don't know how to parse them anyway)
00036 '''
00037 pattern = cls.patterns['msgctxt']
00038 text = re.sub(pattern, '', text)
00039 cls.split_comments(text, object, code=code, pointer=pointer, end=end)
00040
00041 @classmethod
00042 def split_comments (cls, text, object, code='default', pointer=0, end=None):
00043 pattern = cls.patterns['comment']
00044 if end:
00045 match = pattern.search(text, pointer, end)
00046 else:
00047 match = pattern.search(text, pointer)
00048 while match:
00049 st0 = match.start(0)
00050 if st0 > pointer:
00051 cls.split_entities(text, object, code=code, pointer=pointer, end=st0)
00052 comment = Comment()
00053 cls.split_entities(match.group(1), comment, code=code)
00054 object.append(comment)
00055 pointer = match.end(0)
00056 if end:
00057 match = pattern.search(text, pointer, end)
00058 else:
00059 match = pattern.search(text, pointer)
00060 if len(text) > pointer:
00061 cls.split_entities(text, object, code=code, pointer=pointer)
00062
00063 @classmethod
00064 def split_entities (cls, text, object, code='default', pointer=0, end=None):
00065 pattern = cls.patterns['entity']
00066 if end:
00067 match = pattern.search(text, pointer, end)
00068 else:
00069 match = pattern.search(text, pointer)
00070 while match:
00071 if match.start(0) > pointer:
00072 object.append(text[pointer:match.start(0)])
00073 entity = Entity(match.group(1), cls._clean_value(match.group(2)))
00074 entity.params['source'] = {'type': 'gettext',
00075 'string': match.group(0),
00076 'valpos':match.start(2)-match.start(0)}
00077 object.append(entity)
00078 pointer = match.end(0)
00079 if end:
00080 match = pattern.search(text, pointer, end)
00081 else:
00082 match = pattern.search(text, pointer)
00083 if (not end or (end > pointer)) and len(text) > pointer:
00084 if end:
00085 object.append(text[pointer:end])
00086 else:
00087 object.append(text[pointer:])
00088 return object
00089
00090 @classmethod
00091 def _clean_value(cls, text):
00092 return text.replace('"\n"', '')[1:-1]