clients.py

Go to the documentation of this file.
00001 import codecs
00002 import os
00003 import sys
00004 
00005 from silme.core.object import *
00006 import silme.format
00007 
00008 try:
00009     import chardet
00010     char_detector=True
00011 except ImportError:
00012     char_detector=False
00013 
00014 class IOClient(object):
00015     bomdict = { 'utf_8_sig' : codecs.BOM_UTF8,
00016                 'utf_16_be' : codecs.BOM_UTF16_BE,
00017                 'utf_16_le' : codecs.BOM_UTF16_LE,
00018                 'utf_16' : codecs.BOM_UTF16 }
00019 
00020     @classmethod
00021     def matches_path(cls, path):
00022         """
00023         tests if the ioclient should be used for this type of path
00024         """
00025         raise NotImplementedError()
00026 
00027     @classmethod
00028     def get_blob (cls, path, source=True):
00029         raise NotImplementedError()
00030 
00031     @classmethod
00032     def get_entitylist (cls, path, source=False, code='default', parser=None):
00033         raise NotImplementedError()
00034 
00035     @classmethod
00036     def get_l10nobject (cls, path, source=False, code='default', parser=None):
00037         raise NotImplementedError()
00038 
00039     @classmethod
00040     def get_l10npackage (cls, path,
00041                         code='default',
00042                         object_type='l10nobject',
00043                         source=None,
00044                         ignore=['CVS','.svn','.DS_Store', '.hg']):
00045         raise NotImplementedError()
00046 
00047     @classmethod
00048     def get_source(cls, path, encoding=None, fallback=None):
00049         """
00050         reads source from the path
00051         """
00052         raise NotImplementedError()
00053 
00054     @classmethod
00055     def get_source_with_encoding(cls, path, encoding):
00056         """
00057         reads source with encoding fallback
00058         """
00059         raise NotImplementedError()
00060 
00061     @classmethod
00062     def get_source_without_encoding(cls, path):
00063         """
00064         reads source ignoring encoding (in binary compatible mode)
00065         """
00066         raise NotImplementedError()
00067 
00068     @classmethod
00069     def write_blob(cls, blob, path):
00070         raise NotImplementedError()
00071 
00072     @classmethod
00073     def write_entitylist(cls, elist, path):
00074         raise NotImplementedError()
00075 
00076     @classmethod
00077     def write_l10nobject(cls, l10nobject, path):
00078         raise NotImplementedError()
00079 
00080     @classmethod
00081     def write_object(cls, object, path):
00082         raise NotImplementedError()
00083 
00084     @classmethod
00085     def write_l10npackage(cls, l10npackage, path):
00086         raise NotImplementedError()
00087 
00088     @classmethod
00089     def write_source(cls, source, path, encoding):
00090         """
00091         writes source to destination path
00092         """
00093         raise NotImplementedError()
00094 
00095     @classmethod
00096     def path_type(cls, path):
00097         """
00098         returns 'package', 'object' depending on the path type
00099         """
00100         raise NotImplementedError()
00101 
00102 
00103 class FileFormatClient(IOClient):
00104 
00105     @classmethod
00106     def get_blob(cls, path, uri=None, source=True):
00107         blob = Blob()
00108         blob.id = os.path.basename(path)
00109         if source:
00110             blob.source = cls.get_source_without_encoding(path)
00111         blob.uri = uri or path
00112         return blob
00113 
00114     @classmethod
00115     def get_entitylist(cls, path, uri=None, source=False, code='default', parser=None):
00116         if not parser:
00117             parser = silme.format.Manager.get(path=path)
00118         src = cls.get_source(path, encoding = parser.encoding,
00119                             fallback = parser.fallback)
00120         entitylist = parser.get_entitylist(src[0], code=code)
00121         entitylist.id = os.path.basename(path)
00122         entitylist.uri = uri or path
00123         if source:
00124             entitylist.source = src[0]
00125         entitylist.encoding = src[1]
00126         return entitylist
00127 
00128     @classmethod
00129     def get_l10nobject(cls, path, uri=None, source=False, code='default', parser=None):
00130         if not parser:
00131             parser = silme.format.Manager.get(path=path)
00132         src = cls.get_source(path, encoding = parser.encoding,
00133                             fallback = parser.fallback)
00134         l10nobject = parser.get_l10nobject(src[0], code=code)
00135         l10nobject.id = os.path.basename(path)
00136         l10nobject.uri = uri or path
00137         if source:
00138             l10nobject.source = src[0]
00139         l10nobject.encoding = src[1]
00140         return l10nobject
00141 
00142     @classmethod
00143     def get_l10npackage(cls, path,
00144                         code='default',
00145                         object_type='l10nobject',
00146                         source=None,
00147                         ignore=['CVS','.svn','.DS_Store', '.hg']):
00148         l10npackage = L10nPackage()
00149         l10npackage.id = os.path.basename(path)
00150         l10npackage.uri = path
00151         return l10npackage
00152 
00153     @classmethod
00154     def get_source(cls, path, encoding=None, fallback=None):
00155         """
00156         reads source with all autoguessing, encoding guessing.
00157         This methods is offered for reading sources that are semantically
00158         meaningfull for later use.
00159         """
00160         # if the encoding parameter is specified, force it to being used
00161         # There is no fallback if opening with this encoding fails!
00162         if encoding is not None:
00163             output = cls.get_source_with_encoding(path, encoding)
00164             if output[1] != encoding and (fallback is None or 
00165                                         output[1] not in fallback):
00166                 raise Exception('The encoding for path ' + path + ' is not ' + \
00167                                 encoding + ' like expected, but ' + output[1] + \
00168                                 '. ' + output[1] + ' is not in the fallback list' + \
00169                                 ', break.')
00170             return output
00171         else:
00172             # if no encoding specified, try the fallback list
00173             if fallback is None or len(fallback) is 0:
00174                 fallback = ['utf_8']
00175             for coding in fallback:
00176                 try:
00177                     return cls.get_source_with_encoding(path, coding)
00178                 except IOError, e:
00179                     raise
00180                 except UnicodeDecodeError, e:
00181                     continue # TODO: logging
00182             # if we still did not succeed, try to check if the BOM is specified
00183             text = cls._read_without_encoding(path)
00184             for coding, bom in cls.bomdict.items():
00185                 if text.startswith(bom):
00186                     try:
00187                         return cls.get_source_with_encoding(path, coding)
00188                     except UnicodeError, e:
00189                         raise
00190             # UniversalDetector: only used if available
00191             # When used, a slow down of 10 to 20 times can be expected!
00192             # Be warned: sometimes it detects the encoding wrongly!
00193             if char_detector:
00194                 try:
00195                     coding=\
00196                     chardet.detect(cls.read_without_encoding(path))['encoding'].lower()
00197                 except Exception, e:
00198                     pass # TODO: logging
00199                 else:
00200                     try:
00201                         return cls.get_source_with_encoding(path, coding)
00202                     except UnicodeDecodeError, e:
00203                         pass # TODO: logging
00204             # last chance: try to open using the system default encoding
00205             try:
00206                 return cls.get_source_with_encoding(path, sys.getdefaultencoding())
00207             except Exception, e:
00208                 return (cls.get_source_without_encoding(path), None)
00209 
00210     @classmethod
00211     def get_source_with_encoding(cls, path, encoding):
00212         try:
00213             text = cls._read_with_encoding(path, encoding)
00214         except UnicodeDecodeError, e:
00215             raise
00216         except IOError, e:
00217             raise IOError(path + ': ' + str(e))
00218         except Exception, e:
00219             raise
00220         else:
00221             text, encoding = cls._test_bom(text, encoding)
00222             if isinstance(text, str):
00223                 return cls._to_unicode(text, encoding)
00224             else:
00225                 return (text, encoding)
00226 
00227     @classmethod
00228     def get_source_without_encoding(cls, path):
00229         try:
00230             return cls._read_without_encoding(path)
00231         except IOError, e:
00232             raise IOError(path + ': ' + str(e))
00233         except Exception, e:
00234             raise
00235 
00236     @classmethod
00237     def _test_bom(cls, text, encoding):
00238         # unichr(65279) == \ufeff == Unicode BOM as text
00239         if encoding == 'utf_8' and text.startswith(unichr(65279)):
00240             text = text[len(unichr(65279)):]
00241             encoding = 'utf_8_sig'
00242         elif encoding in cls.bomdict.keys() and text.startswith(unichr(65279)):
00243             text = text[len(unichr(65279)):]
00244         elif encoding == 'utf_8_sig' and not text.startswith(unichr(65279)):
00245             encoding = 'utf_8'
00246         return (text, encoding)
00247 
00248     @classmethod
00249     def _to_unicode(cls, text, encoding):
00250         # we want to work only on unicode strings!
00251         try:
00252             decode_to_unicode = codecs.getdecoder(encoding)
00253             text = decode_to_unicode(text)[0]
00254         except UnicodeEncodeError, e:
00255             raise
00256         return (text, encoding)
00257 
00258     @staticmethod
00259     def _get_source_policy(source):
00260         # returns two variables that define whether the source of a file
00261         # should be attached to a given object
00262         #
00263         # if source is True - l10nobject,entitylist and blob get source
00264         # if source is False - none of them gets source
00265         # if source is None - l10nobject and entity list get it, blob does not
00266         if source is None:
00267             b_source = True # blob source
00268             oe_source = False # l10nobject & entitylist source
00269         elif source is False: # don't load it for anyone
00270             b_source = False
00271             oe_source = False
00272         else: # load it for everyone
00273             b_source = True
00274             oe_source = True
00275         return (b_source, oe_source)
00276     
00277     @staticmethod
00278     def _should_ignore(ignore, path, elems):
00279         # allows objects and packages to be ignored inside get_l10npackage.
00280         #
00281         # ignore argument of IOClient.get_l10npackage can be:
00282         # list - list of files and directories to ignore
00283         # function - in which case the function will be launched against each
00284         #            object or package load
00285         if ignore.__class__.__name__=='function': # is function
00286             return ignore(query)
00287         else:
00288             return any([i in ignore for i in elems])
00289 
00290     @classmethod
00291     def _write_source_with_encoding(cls, content, path, encoding=None):
00292         raise NotImplementedError()
00293     
00294     @classmethod
00295     def _read_with_encoding(cls, path, encoding):
00296         raise NotImplementedError()
00297 
00298     @classmethod
00299     def _read_without_encoding(cls, path):
00300         raise NotImplementedError()
00301 
00302 class DBClient (IOClient):
00303     get_blob = None
00304     get_l10nobject = None
00305     write_blob = None
00306     write_l10nobject = None
00307     write_source = None
00308 
00309     def get_entitylist (cls, path):
00310         raise NotImplementedError()
00311 
00312     @classmethod
00313     def write_object(cls, object, path, encoding=None):
00314         if isinstance(object, EntityList):
00315             cls.write_entitylist(object, path, encoding=encoding)
00316         else:
00317             raise TypeError()
00318 
00319 class RCSClient (FileFormatClient):
00320     pass

Generated on Tue May 12 17:37:27 2009 for silme by  doxygen 1.5.8