This old version of Proteopedia is provided for student assignments while the new version is undergoing repairs. Content and edits done in this old version of Proteopedia after March 1, 2026 will eventually be lost when it is retired in about June of 2026.
Apply for new accounts at the new Proteopedia. Your logins will work in both the old and new versions.
User:PMBot/Code
From Proteopedia
Following is the preliminary source code. It reads topic page nameas from Proteopedia:Topic Pages and outputs a list of what would be written when it would have been the first pass. Only topic pages are read, no structures are changed.
# -*- coding: utf-8 -*-
from __future__ import with_statement # This isn't required in Python 2.6
"""
pmbot [OPTIONS]
Goes through all topic pages looking for the usage of non-uploaded
structure pages (official PDBs) in scenes. Each of these PDB pages
is edited such that it contains in the section named "About this
Structure" the string
<!-- PMBot Start -->
"The page TOPICPAGE refers to 1ABC."
or "The pages TOPICPAGES refer to 1ABC."
<!-- PMBot End -->
where TOPICPAGE is a topic page link, TOPICPAGES is a comma-separated
list of topic page links, and 1ABC is the name of the respective structure
page. If such a string exists, it is actualized.
Options:
At the moment, there are no options.
"""
#
# (C) R Stephan 2009
#
# Distributed under the terms of the GPL2.
#
__version__ = '0.10'
#
import wikipedia,re,sys,config
import catlib,traceback,itertools
wikipedia.get_throttle.setDelay(5)
#wikipedia.put_throttle.setDelay(10)
msg={
'en': 'pmbot: maintenance of structure references',
}
def main():
Rco = re.compile (u'<!--(?:.(?<!--))*-->')
Rt1 = re.compile (u'(?<=\[\[)[^\]]+(?=\]\])')
Rt2 = re.compile (u' *\|.*')
Rst = re.compile (u'(?<=STRUCTURE_)[1-9][0-9a-zA-Z][0-9a-zA-Z][0-9a-zA-Z]')
Rap = re.compile (u'(?<=<applet load=.)[1-9][0-9a-zA-Z][0-9a-zA-Z][0-9a-zA-Z](?=[\'\"])')
Rta = re.compile (u"Start of topic pages.*End of topic pages. Please DON'T REMOVE -->", re.DOTALL)
site = wikipedia.getSite()
dic = {}
# Try to read a topic pages list.
pagename = 'Proteopedia:Topic_Pages'
alltopics = wikipedia.Page (site, pagename)
try:
temp_text = alltopics.get (False, True)
except wikipedia.NoPage:
print 'NoPage exception when trying to read topic page list'
return
# with codecs.open('Topic_Pages.txt', encoding='utf-8') as f: temp_text = f.read()
m = Rta.search(temp_text)
if m == None:
print 'Topic list markers not found.'
return
alltopics_text = Rco.sub (u'', m.group(0))
topicsIter = Rt1.finditer (alltopics_text)
c = 0
for topicmatch in topicsIter:
# if c>2: break
c = c+1
t = topicmatch.group(0)
topicname = Rt2.sub (u'', t)
# TODO: check if already loaded before
loaded = False;
while not loaded:
sys.stdout.flush()
print 'Retrieving ' + topicname.encode ('ascii', 'xmlcharrefreplace')
sys.stdout.flush()
topic = wikipedia.Page (site, topicname)
try:
loaded = True
topic_text = topic.get()
except wikipedia.NoPage:
print 'NoPage exception when trying to read ' + topicname.encode ('ascii', 'xmlcharrefreplace')
loaded = False
break
except wikipedia.SectionError:
print 'Subject does not exist: ' + topicname.encode ('ascii', 'xmlcharrefreplace')
topicname = re.sub (ur"#.*", '', topicname)
loaded = False
continue
except wikipedia.IsRedirectPage, inst:
topicname = inst.args[0]
print 'Redirected to ' + topicname.encode ('ascii', 'xmlcharrefreplace')
loaded = False
continue
# print topic_text.encode('utf-8')
if not loaded: continue
links = itertools.chain (
Rt1.finditer (topic_text),
Rst.finditer (topic_text),
Rap.finditer (topic_text))
for linkmatch in links:
l = linkmatch.group(0)
linkname = Rt2.sub ('', l)
if linkname[0]>'0' and linkname[0]<='9':
if linkname in dic:
s = dic[string.lower(linkname)]
else:
s = set()
s.add (topicname.encode('ascii', 'xmlcharrefreplace'))
dic[string.lower(linkname)] = s
print 'Number of topics read: ', c
print 'Number of structures to read/write: ', len(dic)
sys.stdout.flush()
print dic
if __name__ == '__main__':
for arg in wikipedia.handleArgs():
# - TODO: flag to switch from applet to scene backlinks to link backlinks
# - TODO: add option to search scene files
# - TODO: option to restrict number of topics read (c)
# if arg.startswith("-p:"):
# if (len(arg)) == len("-p:"):
# pred = u"refers to"
# else:
# pred = arg[len("-p:"):]
try:
main()
except:
print 'Something wrong.'
traceback.print_exc()
finally:
print 'Stop.'
wikipedia.stopme()
