This repository was archived by the owner on Jan 3, 2019. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 27
/
Copy pathrun_seolinter.py
executable file
·58 lines (44 loc) · 1.57 KB
/
run_seolinter.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# usage:
# > python seolinter.py [text] [format]
# example:
# > cat robots.txt | seolinter.py --format=txt
# > curl http://www.biography.com/sitemaps.xml | seolinter.py
import optparse
import sys
import re
import seolinter
def run(options, args):
stdin = sys.stdin.read()
if options.format == 'auto':
if not re.compile("\<").match(stdin):
options.format = 'txt'
elif not re.compile("\<html").match(stdin):
options.format = 'xml'
else:
options.format = 'html'
if options.format == 'html':
output = seolinter.lint_html(stdin)
if options.format == 'xml':
output = seolinter.lint_sitemap(stdin)
if options.format == 'txt':
output = seolinter.lint_robots_txt(stdin)
exit = 0
for rule in seolinter.rules:
for key, value in output.iteritems():
if key == rule[0]:
print rule[0] + ':', rule[1], '(' + seolinter.levels[rule[2]] + ')'
if value != True:
print "\tfound:", value
if rule[2] == seolinter.ERROR or rule[2] == seolinter.CRITICAL:
exit = 1
# if exit:
# print html_string
sys.exit(exit)
if __name__ == "__main__":
parser = optparse.OptionParser(description='Validates html, sitemap xml and robots.txt content for common errors.')
parser.add_option('-f', '--format', type="string", default='auto',
help='The type of file to parse.')
(options, args) = parser.parse_args()
run(options, args)