On Fri, 2006-08-18 at 11:27, Dinh-Tuan Pham wrote:
> [...] Plus généralement
> la distinction entre iso et utf reste un mistère pour moi. par exemple,
> comment passer d'un environment iso en utf8. Comment résoudre problèmes
> de nom de fichers accentués voire les contenue de fichiers textes aayant
> des caractère accentués, ...
Pour les noms de fichiers, il faut faire passer une moulinette qui va
tout transformer depuis iso-latin-1 à utf-8.
Je t'en joins une.
Xav
#!/usr/bin/env python
import sys;
import os;
import codecs;
utf8_decoder = None
decoder = None
encoder = None
def rename_if_needed (dirname, f):
try:
utf8_decoder (f, "strict")
except UnicodeError:
try:
d = decoder (f, "strict")
try:
c = encoder (d[0], "strict")
if (c[0] != f):
print "Renaming " + os.path.join (dirname, f) + " to " + os.path.join (dirname, c[0])
os.rename (os.path.join (dirname, f),
os.path.join (dirname, c[0]))
except UnicodeError:
print "You are totally fucked with file " + os.path.join (dirname, f)
sys.exit (1);
except UnicodeError:
print "File " + os.path.join (dirname, f) + " not in the encoding you specified; skipping"
def fix_dir (dirname):
dirs = []
files = []
for f in os.listdir (dirname):
if os.path.isdir (os.path.join (dirname, f)) and not os.path.islink (os.path.join (dirname, f)):
dirs.append (f);
else:
files.append (f);
for f in files:
rename_if_needed (dirname, f)
for d in dirs:
fix_dir (os.path.join (dirname, d))
rename_if_needed (dirname, d)
if len (sys.argv) < 2:
sys.stderr.write ("Converts filenames to UTF-8 encoding, recursively\n"
"\n"
"Usage: " + sys.argv[0] + " <ORIGINAL_CHARSET>\n"
"\n"
"Example: " + sys.argv[0] + " ISO8859-1\n");
sys.exit (1);
old_encoding = sys.argv[1]
utf8_decoder = codecs.getdecoder ("utf-8")
decoder = codecs.getdecoder (old_encoding)
encoder = codecs.getencoder ("utf-8")
fix_dir (os.getcwd ())