This old version of Proteopedia is provided for student assignments while the new version is undergoing repairs. Content and edits done in this old version of Proteopedia after March 1, 2026 will eventually be lost when it is retired in about June of 2026.

Apply for new accounts at the new Proteopedia. Your logins will work in both the old and new versions.

User:PMBot/Code

From Proteopedia

< User:PMBot

Jump to: navigation, search

Following is the preliminary source code. It reads topic page nameas from Proteopedia:Topic Pages and outputs a list of what would be written when it would have been the first pass. Only topic pages are read, no structures are changed.

# -*- coding: utf-8 -*-
from __future__ import with_statement # This isn't required in Python 2.6
"""
pmbot [OPTIONS]
Goes through all topic pages looking for the usage of non-uploaded
structure pages (official PDBs) in scenes. Each of these PDB pages
is edited such that it contains in the section named "About this
Structure" the string

<!-- PMBot Start -->
"The page TOPICPAGE refers to 1ABC."
or "The pages TOPICPAGES refer to 1ABC."
<!-- PMBot End -->

where TOPICPAGE is a topic page link, TOPICPAGES is a comma-separated
list of topic page links, and 1ABC is the name of the respective structure 
page. If such a string exists, it is actualized.

Options:
At the moment, there are no options.

"""
#
# (C) R Stephan 2009
#
# Distributed under the terms of the GPL2.
# 
__version__ = '0.10'
#

import wikipedia,re,sys,config
import catlib,traceback,itertools

wikipedia.get_throttle.setDelay(5)
#wikipedia.put_throttle.setDelay(10)

msg={
    'en': 'pmbot: maintenance of structure references',
    }

def main():
  Rco = re.compile (u'<!--(?:.(?<!--))*-->')
  Rt1 = re.compile (u'(?<=\[\[)[^\]]+(?=\]\])')
  Rt2 = re.compile (u' *\|.*')
  Rst = re.compile (u'(?<=STRUCTURE_)[1-9][0-9a-zA-Z][0-9a-zA-Z][0-9a-zA-Z]')
  Rap = re.compile (u'(?<=<applet load=.)[1-9][0-9a-zA-Z][0-9a-zA-Z][0-9a-zA-Z](?=[\'\"])')
  Rta = re.compile (u"Start of topic pages.*End of topic pages. Please DON'T REMOVE -->", re.DOTALL)
  site = wikipedia.getSite()
  dic = {}

# Try to read a topic pages list.
  pagename = 'Proteopedia:Topic_Pages'
  alltopics = wikipedia.Page (site, pagename)
  try:
    temp_text = alltopics.get (False, True)
  except wikipedia.NoPage:
    print 'NoPage exception when trying to read topic page list'
    return
#  with codecs.open('Topic_Pages.txt', encoding='utf-8') as f: temp_text = f.read()

  m = Rta.search(temp_text)
  if m == None:
    print 'Topic list markers not found.'
    return
  alltopics_text = Rco.sub (u'', m.group(0))
  topicsIter = Rt1.finditer (alltopics_text)

  c = 0
  for topicmatch in topicsIter:
#    if c>2: break
    c = c+1
    t = topicmatch.group(0)
    topicname = Rt2.sub (u'', t)
    
    # TODO: check if already loaded before
    loaded = False;
    while not loaded:
      sys.stdout.flush()
      print 'Retrieving ' + topicname.encode ('ascii', 'xmlcharrefreplace')
      sys.stdout.flush()
      topic = wikipedia.Page (site, topicname)
      try:
        loaded = True
        topic_text = topic.get()
      except wikipedia.NoPage:
        print 'NoPage exception when trying to read ' + topicname.encode ('ascii', 'xmlcharrefreplace')
        loaded = False
        break
      except wikipedia.SectionError:
        print 'Subject does not exist: ' + topicname.encode ('ascii', 'xmlcharrefreplace')
        topicname = re.sub (ur"#.*", '', topicname)
        loaded = False
        continue
      except wikipedia.IsRedirectPage, inst:
        topicname = inst.args[0]
        print 'Redirected to ' + topicname.encode ('ascii', 'xmlcharrefreplace')
        loaded = False
        continue
#    print topic_text.encode('utf-8')
    
    if not loaded: continue
    links = itertools.chain (
        Rt1.finditer (topic_text),
        Rst.finditer (topic_text),
        Rap.finditer (topic_text))
    for linkmatch in links:
      l = linkmatch.group(0)
      linkname = Rt2.sub ('', l)
      if linkname[0]>'0' and linkname[0]<='9':
        if linkname in dic:
          s = dic[string.lower(linkname)]
        else:
          s = set()
        s.add (topicname.encode('ascii', 'xmlcharrefreplace'))
        dic[string.lower(linkname)] = s
  print 'Number of topics read: ', c
  print 'Number of structures to read/write: ', len(dic)
  sys.stdout.flush()
  print dic

if __name__ == '__main__':
  for arg in wikipedia.handleArgs():
# - TODO: flag to switch from applet to scene backlinks to link backlinks
# - TODO: add option to search scene files
# - TODO: option to restrict number of topics read (c)
#    if arg.startswith("-p:"):
#      if (len(arg)) == len("-p:"):
#        pred = u"refers to"
#      else:
#        pred = arg[len("-p:"):]
        
  try:
    main()
  except:
    print 'Something wrong.'
    traceback.print_exc()
  finally:
    print 'Stop.'
    wikipedia.stopme()

Proteopedia Page Contributors and Editors (what is this?)

PMBot

Retrieved from "http://52.214.119.220/wiki/index.php/User:PMBot/Code"

User:PMBot/Code

From Proteopedia

Proteopedia Page Contributors and Editors (what is this?)

Views

Personal tools

Navigation

Search

Toolbox