Subversion Repositories pyptex

Rev

Rev 21 | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
9 janik 1
# -*- coding: utf-8 -*-
2
 
8 janik 3
# PyPTeX, bibtex replacement with better templating and unicode support
4
# Copyright (C) 2008  Jan Janech <jan.janech@kst.uniza.sk>
5
#
6
# This program is free software: you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation, either version 3 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful,
12
# but WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14
# GNU General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
18
 
1 janik 19
coding = [
20
    (u"á", "'a"),
2 janik 21
    (u"ä", "\"a"),
16 janik 22
    (u"ã", "~a"),
1 janik 23
    (u"č", "vc"),
24
    (u"ď", "vd"),
25
    (u"é", "'e"),
7 janik 26
    (u"ě", "ve"),
1 janik 27
    (u"í", "'i"),
28
    (u"ľ", "vl"),
29
    (u"ĺ", "'l"),
30
    (u"ň", "vn"),
31
    (u"ó", "'o"),
32
    (u"ô", "^o"),
21 janik 33
    (u"ő", "\"o"),
13 janik 34
    (u"ö", "\"o"),
1 janik 35
    (u"ř", "vr"),
36
    (u"š", "vs"),
37
    (u"ś", "'s"),
38
    (u"ť", "vt"),
39
    (u"ú", "'u"),
12 janik 40
    (u"ů", "ru"),
21 janik 41
    (u"ű", "\"u"),
42
    (u"ü", "\"u"),
1 janik 43
    (u"ý", "'y"),
44
    (u"ž", "vz"),
45
    (u"Á", "'A"),
16 janik 46
    (u"Ã", "~A"),
1 janik 47
    (u"Č", "vC"),
48
    (u"Ď", "vD"),
49
    (u"É", "'E"),
7 janik 50
    (u"Ě", "vE"),
1 janik 51
    (u"Í", "'I"),
52
    (u"Ľ", "vL"),
53
    (u"Ĺ", "'L"),
54
    (u"Ň", "vN"),
7 janik 55
    (u"Ó", "'O"),
21 janik 56
    (u"Ő", "\"O"),
13 janik 57
    (u"Ö", "\"O"),
1 janik 58
    (u"Ř", "vR"),
59
    (u"Š", "vS"),
60
    (u"Ś", "'S"),
61
    (u"Ť", "vT"),
62
    (u"Ú", "'U"),
12 janik 63
    (u"Ů", "rU"),
21 janik 64
    (u"Ű", "\"U"),
65
    (u"Ü", "\"U"),
1 janik 66
    (u"Ý", "'Y"),
67
    (u"Ž", "vZ"),
21 janik 68
    (u"–", " --"),
69
    (u"ß", " {\ss}"),
26 janik 70
    (u"è", "`e")
1 janik 71
]
72
 
11 janik 73
__accents = set(i[1][0] for i in coding if i[1] != ' ')
1 janik 74
__codedict = dict([(i[0], i[1]) for i in coding if i[1] != ' '])
75
__decodedict = dict([(i[1], i[0]) for i in coding])
76
 
77
del coding
78
 
79
def texdecode(text):
80
    accent = None
81
    space = False
7 janik 82
    macro = u''
1 janik 83
    out = u''
84
    for i in text:
11 janik 85
        if accent is True and i not in __accents:
86
            accent = None
87
            out += macro
88
            macro = u''
1 janik 89
        if i == '\\':
90
            accent = True
7 janik 91
            macro = u'\\'
1 janik 92
        elif accent is True:
93
            accent = i
7 janik 94
            macro += i
1 janik 95
        elif space:
96
            if i.isalpha():
7 janik 97
                macro += i
98
                if (accent+i) in __decodedict:
99
                    out += __decodedict[accent+i]
100
                else:
101
                    out += macro
102
                macro = u''
1 janik 103
                accent = None
7 janik 104
                space = False
1 janik 105
            elif not i.isspace and i != '{':
7 janik 106
                macro += i
1 janik 107
                space = False
7 janik 108
            else:
109
                macro += i
1 janik 110
        elif accent is not None:
111
            if i == '{':
7 janik 112
                macro += i
1 janik 113
                space = True
114
            elif i.isspace():
7 janik 115
                macro += i
1 janik 116
                space = True
117
            elif i.isalpha():
7 janik 118
                macro += i
119
                if (accent+i) in __decodedict:
120
                    out += __decodedict[accent+i]
121
                else:
122
                    out += macro
123
                macro = u''
1 janik 124
                accent = None
7 janik 125
                space = False
1 janik 126
            else:
7 janik 127
                out += macro
128
                macro = u''
1 janik 129
                accent = None
7 janik 130
                space = False
1 janik 131
        else:
132
            out += i
133
    return out
134
 
135
def texencode(text):
136
    out = ''
137
    for i in unicode(text):
138
        if i in __codedict:
139
            val = __codedict[i]
140
            if val[0] == ' ':
141
                out += val[1:]
142
            else:
143
                out += '\\%s{%s}'%(val[0], val[1])
144
        else:
145
            out += str(i)
146
    return out
5 janik 147
 
148
def encodetoascii(text):
149
    out = ''
150
    for i in unicode(text):
151
        if i in __codedict:
152
            val = __codedict[i]
153
            if val[0] == ' ':
154
                out += val[1:]
155
            else:
156
                out += val[1]
157
        else:
158
            out += str(i)
159
    return out