Skip to content

Commit b080ca5

Browse files
committed
merging update, better error handling of empty files
2 parents a294e17 + 5c410d4 commit b080ca5

File tree

4 files changed

+48
-85
lines changed

4 files changed

+48
-85
lines changed

crontab.txt

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
1,30,20,40,50,10 * * * * bash /home/h4ck3rm1k3/experiments/wikiteam/runexport.sh

dumpgenerator.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -801,7 +801,7 @@ def push_zip (file):
801801

802802
year = d.year
803803
month= d.month
804-
block= "wikipedia-delete-v2-%0.4d-%02d" % (year, month)
804+
block= "wikipedia-delete-v3-%0.4d-%02d" % (year, month)
805805
print "going to use %s" % block
806806
conn = boto.connect_s3(host='s3.us.archive.org', is_secure=False)
807807
bucket = conn.get_bucket(block)

pywikibot/throttle.py

+5-3
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ def __init__(self, mindelay=None, maxdelay=None, writedelay=None,
3737
multiplydelay=True, verbosedelay=False, write=False):
3838
self.lock = threading.RLock()
3939
self.mysite = None
40-
self.ctrlfilename = config.datafilepath('pywikibot', 'throttle.ctrl')
40+
self.ctrlfilename = config.datafilepath('pywikibot2', 'throttle2.ctrl')
4141
self.mindelay = mindelay
4242
if self.mindelay is None:
4343
self.mindelay = config.minthrottle
@@ -234,7 +234,8 @@ def __call__(self, requestsize=1, write=False):
234234
"""
235235
self.lock.acquire()
236236
try:
237-
wait = self.waittime(write=write or self.write)
237+
#wait = self.waittime(write=write or self.write)
238+
wait = 1
238239
# Calculate the multiplicity of the next delay based on how
239240
# big the request is that is being posted now.
240241
# We want to add "one delay" for each factor of two in the
@@ -271,7 +272,8 @@ def lag(self, lagtime):
271272
# wait at least 5 seconds but not more than 120 seconds
272273
delay = min(max(5, lagtime//2), 120)
273274
# account for any time we waited while acquiring the lock
274-
wait = delay - (time.time() - started)
275+
# wait = delay - (time.time() - started)
276+
wait = 1
275277
if wait > 0:
276278
if wait > config.noisysleep:
277279
pywikibot.output(

speedydeletion.py

+41-81
Original file line numberDiff line numberDiff line change
@@ -11,36 +11,6 @@
1111
from shove import Shove
1212
file_store = Shove('file://wikiaupload')
1313

14-
# def signpage(site,pagename) :
15-
16-
# generator = [pywikibot.Page(
17-
# site,
18-
# pagename
19-
# )]
20-
# # Main Loop
21-
# for page in generator:
22-
# print "going to process %s" % page.urlname()
23-
# try:
24-
# text = page.get()
25-
# except:
26-
# text = ""
27-
28-
# m = re.search("==archived on speedy deletion wikia==" , text)
29-
# if not(m):
30-
# m = re.search("==archived==" , text)
31-
# if not( m):
32-
# summary="notification of speedy deletion page"
33-
# newname =page.urlname()
34-
# newname = newname.replace('Talk%3A', '')
35-
# newtext= "==archived on speedy deletion wikia==\nThis endangered article has been archived here http://speedydeletion.wikia.com/wiki/%s so that it is not lost if deleted. Changes made after the archiving will not be copied.\n~~~~" % newname
36-
# (text, newtext, always) = add_text(page, newtext, summary, regexSkip,
37-
# regexSkipUrl, always, up, True, reorderEnabled=reorderEnabled,
38-
# create=talkPage)
39-
# else:
40-
# print "skipping %s" % page.urlname()
41-
# else:
42-
# print "skipping %s" % page.urlname()
43-
4414
def main(*args):
4515
genFactory = pagegenerators.GeneratorFactory()
4616
# If xmlfilename is None, references will be loaded from the live wiki.
@@ -51,16 +21,11 @@ def main(*args):
5121
# read command line parameters
5222
for arg in pywikibot.handleArgs(*args):
5323
xmlfilename = arg
54-
5524
print xmlfilename
56-
5725
insite = pywikibot.getSite("en","wikipedia")
58-
5926
importsite = "speedydeletion"
60-
6127
outsite = pywikibot.getSite("en",importsite)
6228
outsite.forceLogin()
63-
6429
dump = xmlreader.XmlDump(xmlfilename)
6530
count = 0
6631

@@ -74,60 +39,55 @@ def main(*args):
7439
for entry in dump.parse():
7540
# print file_store[entry.title]
7641
title=entry.title.encode("ascii","ignore")
77-
7842

79-
m = re.search("Wikipedia:" , entry.title)
80-
if m:
43+
if re.search("^Wikipedia:" , entry.title):
44+
pywikibot.output(u'skipping %s' % entry.title)
45+
continue
46+
if re.search("^User:" , entry.title):
47+
pywikibot.output(u'skipping %s' % entry.title)
48+
continue
49+
if re.search("^User Talk:" , entry.title):
50+
pywikibot.output(u'skipping %s' % entry.title)
51+
continue
52+
if re.search(".css$" , entry.title):
53+
pywikibot.output(u'skipping %s' % entry.title)
54+
continue
55+
if re.search("^Main Page" , entry.title):
8156
pywikibot.output(u'skipping %s' % entry.title)
82-
next;
83-
if entry.title != "Main Page" :
57+
continue
58+
pywikibot.output(u'Considering %s' % entry.title)
59+
try :
60+
if (file_store[title] ) :
61+
count = count +1
62+
else:
63+
pywikibot.output(u'not exists %s' % entry.title)
64+
except KeyError :
8465
try :
85-
if (file_store[title] ) :
86-
count = count +1
87-
# pywikibot.output(u'was cached %s' % entry.title)
66+
outpage = pywikibot.Page(site=outsite, title=entry.title, insite=outsite)
67+
if outpage.exists():
68+
pywikibot.output(u'there is an article %s' % entry.title)
69+
try:
70+
file_store[title] = 1
71+
except KeyError :
72+
pywikibot.output(u'key error saving article %s' % entry.title)
8873
else:
89-
pywikibot.output(u'not exists %s' % entry.title)
90-
except KeyError :
91-
# print sys.exc_type, ":", "%s is not in the list." % sys.exc_value
92-
# pywikibot.output(u'key error %s' % entry.title)
93-
try :
94-
outpage = pywikibot.Page(site=outsite, title=entry.title, insite=outsite)
95-
if outpage.exists():
96-
pywikibot.output(u'there is an article %s' % entry.title)
97-
try:
98-
file_store[title] = 1
99-
except KeyError :
100-
pywikibot.output(u'key error saving article %s' % entry.title)
101-
74+
pywikibot.output(u'is not there, adding %s' % entry.title)
75+
contents = entry.text
76+
usernames = entry.username
77+
if re.search('Template:', title):
78+
contents = contents + "<noinclude>{{wikipedia-template|%s}}</noinclude>" % usernames
10279
else:
103-
pywikibot.output(u'is not there, adding %s' % entry.title)
104-
contents = entry.text
105-
usernames = entry.username
106-
if re.search('Template:', title):
107-
contents = contents + "<noinclude>{{wikipedia-template|%s}}</noinclude>" % usernames
108-
else:
109-
contents = contents + "\n{{wikipedia-deleted|%s}}" % usernames
110-
# contents = contents + "\n{{wikipedia-deleted|%s}}" % usernames
111-
112-
outpage._site=outsite
113-
outpage.put(contents)
114-
115-
# signpage(insite,"Talk:%s" % pagename)
116-
117-
try :
118-
file_store[title] = 1
119-
except:
120-
pywikibot.output(u'could not save %s! to the list of article' % entry.title)
80+
contents = contents + "\n{{wikipedia-deleted|%s}}" % usernames
81+
outpage._site=outsite
82+
outpage.put(contents)
83+
try :
84+
file_store[title] = 1
12185
except:
122-
pywikibot.output(u'could not process %s! ' % entry.title)
123-
finally:
124-
count = count + 1
125-
except:
126-
pywikibot.output(u'could not process %s! ' % entry.title)
86+
pywikibot.output(u'could not save %s! to the list of article' % entry.title)
12787
finally:
12888
count = count + 1
129-
#print "done with %s %d" % (entry.title, count)
130-
89+
finally:
90+
count = count + 1
13191

13292
if __name__ == "__main__":
13393
try:

0 commit comments

Comments
 (0)