summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlberto Bertogli <albertito@blitiri.com.ar>2012-11-27 00:29:08 +0100
committerAlberto Bertogli <albertito@blitiri.com.ar>2012-11-27 03:57:31 +0100
commit9ec2bde5c45c64f7fac432dbd3f23a1883d2b594 (patch)
tree8ec4be0291c15b6205c6e0e6c7af2831659a24fb
parent36db9cc0ee151a0249d0ce9089fec9134645e26f (diff)
downloadgit-arr-fork-9ec2bde5c45c64f7fac432dbd3f23a1883d2b594.zip
Only guess the lexer if the file starts with "#!"
The lexer guesser based on content is often wrong; to minimize the chances of that happening, we only use it on files that start with "#!", for which it usually has smarter rules. Signed-off-by: Alberto Bertogli <albertito@blitiri.com.ar>
-rw-r--r--utils.py13
1 files changed, 9 insertions, 4 deletions
diff --git a/utils.py b/utils.py
index 039d02b..801580e 100644
--- a/utils.py
+++ b/utils.py
@@ -52,10 +52,15 @@ def colorize_blob(fname, s):
try:
lexer = lexers.guess_lexer_for_filename(fname, s, encoding = 'utf-8')
except lexers.ClassNotFound:
- try:
- lexer = lexers.guess_lexer(s[:200], encoding = 'utf-8')
- except lexers.ClassNotFound:
- lexer = lexers.TextLexer(encoding = 'utf-8')
+ # Only try to guess lexers if the file starts with a shebang,
+ # otherwise it's likely a text file and guess_lexer() is prone to
+ # make mistakes with those.
+ lexer = lexers.TextLexer(encoding = 'utf-8')
+ if s.startswith('#!'):
+ try:
+ lexer = lexers.guess_lexer(s[:80], encoding = 'utf-8')
+ except lexers.ClassNotFound:
+ pass
formatter = HtmlFormatter(encoding = 'utf-8',
cssclass = 'source_code',