00001 import codecs
00002 import os
00003 import sys
00004
00005 from silme.core.object import *
00006 import silme.format
00007
00008 try:
00009 import chardet
00010 char_detector=True
00011 except ImportError:
00012 char_detector=False
00013
00014 class IOClient(object):
00015 bomdict = { 'utf_8_sig' : codecs.BOM_UTF8,
00016 'utf_16_be' : codecs.BOM_UTF16_BE,
00017 'utf_16_le' : codecs.BOM_UTF16_LE,
00018 'utf_16' : codecs.BOM_UTF16 }
00019
00020 @classmethod
00021 def matches_path(cls, path):
00022 """
00023 tests if the ioclient should be used for this type of path
00024 """
00025 raise NotImplementedError()
00026
00027 @classmethod
00028 def get_blob (cls, path, source=True):
00029 raise NotImplementedError()
00030
00031 @classmethod
00032 def get_entitylist (cls, path, source=False, code='default', parser=None):
00033 raise NotImplementedError()
00034
00035 @classmethod
00036 def get_l10nobject (cls, path, source=False, code='default', parser=None):
00037 raise NotImplementedError()
00038
00039 @classmethod
00040 def get_l10npackage (cls, path,
00041 code='default',
00042 object_type='l10nobject',
00043 source=None,
00044 ignore=['CVS','.svn','.DS_Store', '.hg']):
00045 raise NotImplementedError()
00046
00047 @classmethod
00048 def get_source(cls, path, encoding=None, fallback=None):
00049 """
00050 reads source from the path
00051 """
00052 raise NotImplementedError()
00053
00054 @classmethod
00055 def get_source_with_encoding(cls, path, encoding):
00056 """
00057 reads source with encoding fallback
00058 """
00059 raise NotImplementedError()
00060
00061 @classmethod
00062 def get_source_without_encoding(cls, path):
00063 """
00064 reads source ignoring encoding (in binary compatible mode)
00065 """
00066 raise NotImplementedError()
00067
00068 @classmethod
00069 def write_blob(cls, blob, path):
00070 raise NotImplementedError()
00071
00072 @classmethod
00073 def write_entitylist(cls, elist, path):
00074 raise NotImplementedError()
00075
00076 @classmethod
00077 def write_l10nobject(cls, l10nobject, path):
00078 raise NotImplementedError()
00079
00080 @classmethod
00081 def write_object(cls, object, path):
00082 raise NotImplementedError()
00083
00084 @classmethod
00085 def write_l10npackage(cls, l10npackage, path):
00086 raise NotImplementedError()
00087
00088 @classmethod
00089 def write_source(cls, source, path, encoding):
00090 """
00091 writes source to destination path
00092 """
00093 raise NotImplementedError()
00094
00095 @classmethod
00096 def path_type(cls, path):
00097 """
00098 returns 'package', 'object' depending on the path type
00099 """
00100 raise NotImplementedError()
00101
00102
00103 class FileFormatClient(IOClient):
00104
00105 @classmethod
00106 def get_blob(cls, path, uri=None, source=True):
00107 blob = Blob()
00108 blob.id = os.path.basename(path)
00109 if source:
00110 blob.source = cls.get_source_without_encoding(path)
00111 blob.uri = uri or path
00112 return blob
00113
00114 @classmethod
00115 def get_entitylist(cls, path, uri=None, source=False, code='default', parser=None):
00116 if not parser:
00117 parser = silme.format.Manager.get(path=path)
00118 src = cls.get_source(path, encoding = parser.encoding,
00119 fallback = parser.fallback)
00120 entitylist = parser.get_entitylist(src[0], code=code)
00121 entitylist.id = os.path.basename(path)
00122 entitylist.uri = uri or path
00123 if source:
00124 entitylist.source = src[0]
00125 entitylist.encoding = src[1]
00126 return entitylist
00127
00128 @classmethod
00129 def get_l10nobject(cls, path, uri=None, source=False, code='default', parser=None):
00130 if not parser:
00131 parser = silme.format.Manager.get(path=path)
00132 src = cls.get_source(path, encoding = parser.encoding,
00133 fallback = parser.fallback)
00134 l10nobject = parser.get_l10nobject(src[0], code=code)
00135 l10nobject.id = os.path.basename(path)
00136 l10nobject.uri = uri or path
00137 if source:
00138 l10nobject.source = src[0]
00139 l10nobject.encoding = src[1]
00140 return l10nobject
00141
00142 @classmethod
00143 def get_l10npackage(cls, path,
00144 code='default',
00145 object_type='l10nobject',
00146 source=None,
00147 ignore=['CVS','.svn','.DS_Store', '.hg']):
00148 l10npackage = L10nPackage()
00149 l10npackage.id = os.path.basename(path)
00150 l10npackage.uri = path
00151 return l10npackage
00152
00153 @classmethod
00154 def get_source(cls, path, encoding=None, fallback=None):
00155 """
00156 reads source with all autoguessing, encoding guessing.
00157 This methods is offered for reading sources that are semantically
00158 meaningfull for later use.
00159 """
00160
00161
00162 if encoding is not None:
00163 output = cls.get_source_with_encoding(path, encoding)
00164 if output[1] != encoding and (fallback is None or
00165 output[1] not in fallback):
00166 raise Exception('The encoding for path ' + path + ' is not ' + \
00167 encoding + ' like expected, but ' + output[1] + \
00168 '. ' + output[1] + ' is not in the fallback list' + \
00169 ', break.')
00170 return output
00171 else:
00172
00173 if fallback is None or len(fallback) is 0:
00174 fallback = ['utf_8']
00175 for coding in fallback:
00176 try:
00177 return cls.get_source_with_encoding(path, coding)
00178 except IOError, e:
00179 raise
00180 except UnicodeDecodeError, e:
00181 continue
00182
00183 text = cls._read_without_encoding(path)
00184 for coding, bom in cls.bomdict.items():
00185 if text.startswith(bom):
00186 try:
00187 return cls.get_source_with_encoding(path, coding)
00188 except UnicodeError, e:
00189 raise
00190
00191
00192
00193 if char_detector:
00194 try:
00195 coding=\
00196 chardet.detect(cls.read_without_encoding(path))['encoding'].lower()
00197 except Exception, e:
00198 pass
00199 else:
00200 try:
00201 return cls.get_source_with_encoding(path, coding)
00202 except UnicodeDecodeError, e:
00203 pass
00204
00205 try:
00206 return cls.get_source_with_encoding(path, sys.getdefaultencoding())
00207 except Exception, e:
00208 return (cls.get_source_without_encoding(path), None)
00209
00210 @classmethod
00211 def get_source_with_encoding(cls, path, encoding):
00212 try:
00213 text = cls._read_with_encoding(path, encoding)
00214 except UnicodeDecodeError, e:
00215 raise
00216 except IOError, e:
00217 raise IOError(path + ': ' + str(e))
00218 except Exception, e:
00219 raise
00220 else:
00221 text, encoding = cls._test_bom(text, encoding)
00222 if isinstance(text, str):
00223 return cls._to_unicode(text, encoding)
00224 else:
00225 return (text, encoding)
00226
00227 @classmethod
00228 def get_source_without_encoding(cls, path):
00229 try:
00230 return cls._read_without_encoding(path)
00231 except IOError, e:
00232 raise IOError(path + ': ' + str(e))
00233 except Exception, e:
00234 raise
00235
00236 @classmethod
00237 def _test_bom(cls, text, encoding):
00238
00239 if encoding == 'utf_8' and text.startswith(unichr(65279)):
00240 text = text[len(unichr(65279)):]
00241 encoding = 'utf_8_sig'
00242 elif encoding in cls.bomdict.keys() and text.startswith(unichr(65279)):
00243 text = text[len(unichr(65279)):]
00244 elif encoding == 'utf_8_sig' and not text.startswith(unichr(65279)):
00245 encoding = 'utf_8'
00246 return (text, encoding)
00247
00248 @classmethod
00249 def _to_unicode(cls, text, encoding):
00250
00251 try:
00252 decode_to_unicode = codecs.getdecoder(encoding)
00253 text = decode_to_unicode(text)[0]
00254 except UnicodeEncodeError, e:
00255 raise
00256 return (text, encoding)
00257
00258 @staticmethod
00259 def _get_source_policy(source):
00260
00261
00262
00263
00264
00265
00266 if source is None:
00267 b_source = True
00268 oe_source = False
00269 elif source is False:
00270 b_source = False
00271 oe_source = False
00272 else:
00273 b_source = True
00274 oe_source = True
00275 return (b_source, oe_source)
00276
00277 @staticmethod
00278 def _should_ignore(ignore, path, elems):
00279
00280
00281
00282
00283
00284
00285 if ignore.__class__.__name__=='function':
00286 return ignore(query)
00287 else:
00288 return any([i in ignore for i in elems])
00289
00290 @classmethod
00291 def _write_source_with_encoding(cls, content, path, encoding=None):
00292 raise NotImplementedError()
00293
00294 @classmethod
00295 def _read_with_encoding(cls, path, encoding):
00296 raise NotImplementedError()
00297
00298 @classmethod
00299 def _read_without_encoding(cls, path):
00300 raise NotImplementedError()
00301
00302 class DBClient (IOClient):
00303 get_blob = None
00304 get_l10nobject = None
00305 write_blob = None
00306 write_l10nobject = None
00307 write_source = None
00308
00309 def get_entitylist (cls, path):
00310 raise NotImplementedError()
00311
00312 @classmethod
00313 def write_object(cls, object, path, encoding=None):
00314 if isinstance(object, EntityList):
00315 cls.write_entitylist(object, path, encoding=encoding)
00316 else:
00317 raise TypeError()
00318
00319 class RCSClient (FileFormatClient):
00320 pass