Subversion Repositories pyptex

Rev

Rev 21 | Blame | Compare with Previous | Last modification | View Log | RSS feed

# -*- coding: utf-8 -*-

# PyPTeX, bibtex replacement with better templating and unicode support
# Copyright (C) 2008  Jan Janech <jan.janech@kst.uniza.sk>
# 
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# 
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
# 
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

coding = [
    (u"á", "'a"),
    (u"ä", "\"a"),
    (u"ã", "~a"),
    (u"č", "vc"),
    (u"ď", "vd"),
    (u"é", "'e"),
    (u"ě", "ve"),
    (u"í", "'i"),
    (u"ľ", "vl"),
    (u"ĺ", "'l"),
    (u"ň", "vn"),
    (u"ó", "'o"),
    (u"ô", "^o"),
    (u"ő", "\"o"),
    (u"ö", "\"o"),
    (u"ř", "vr"),
    (u"š", "vs"),
    (u"ś", "'s"),
    (u"ť", "vt"),
    (u"ú", "'u"),
    (u"ů", "ru"),
    (u"ű", "\"u"),
    (u"ü", "\"u"),
    (u"ý", "'y"),
    (u"ž", "vz"),
    (u"Á", "'A"),
    (u"Ã", "~A"),
    (u"Č", "vC"),
    (u"Ď", "vD"),
    (u"É", "'E"),
    (u"Ě", "vE"),
    (u"Í", "'I"),
    (u"Ľ", "vL"),
    (u"Ĺ", "'L"),
    (u"Ň", "vN"),
    (u"Ó", "'O"),
    (u"Ő", "\"O"),
    (u"Ö", "\"O"),
    (u"Ř", "vR"),
    (u"Š", "vS"),
    (u"Ś", "'S"),
    (u"Ť", "vT"),
    (u"Ú", "'U"),
    (u"Ů", "rU"),
    (u"Ű", "\"U"),
    (u"Ü", "\"U"),
    (u"Ý", "'Y"),
    (u"Ž", "vZ"),
    (u"–", " --"),
    (u"ß", " {\ss}"),
    (u"è", "`e")
]

__accents = set(i[1][0] for i in coding if i[1] != ' ')
__codedict = dict([(i[0], i[1]) for i in coding if i[1] != ' '])
__decodedict = dict([(i[1], i[0]) for i in coding])

del coding

def texdecode(text):
    accent = None
    space = False
    macro = u''
    out = u''
    for i in text:
        if accent is True and i not in __accents:
            accent = None
            out += macro
            macro = u''
        if i == '\\':
            accent = True
            macro = u'\\'
        elif accent is True:
            accent = i
            macro += i
        elif space:
            if i.isalpha():
                macro += i
                if (accent+i) in __decodedict:
                    out += __decodedict[accent+i]
                else:
                    out += macro
                macro = u''
                accent = None
                space = False
            elif not i.isspace and i != '{':
                macro += i
                space = False
            else:
                macro += i
        elif accent is not None:
            if i == '{':
                macro += i
                space = True
            elif i.isspace():
                macro += i
                space = True
            elif i.isalpha():
                macro += i
                if (accent+i) in __decodedict:
                    out += __decodedict[accent+i]
                else:
                    out += macro
                macro = u''
                accent = None
                space = False
            else:
                out += macro
                macro = u''
                accent = None
                space = False
        else:
            out += i
    return out

def texencode(text):
    out = ''
    for i in unicode(text):
        if i in __codedict:
            val = __codedict[i]
            if val[0] == ' ':
                out += val[1:]
            else:
                out += '\\%s{%s}'%(val[0], val[1])
        else:
            out += str(i)
    return out

def encodetoascii(text):
    out = ''
    for i in unicode(text):
        if i in __codedict:
            val = __codedict[i]
            if val[0] == ' ':
                out += val[1:]
            else:
                out += val[1]
        else:
            out += str(i)
    return out