wusenet

Check-in [a208a798e8]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Continue cooking up article loading/caching
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | master | trunk
Files: files | file ages | folders
SHA3-256:a208a798e807ae5713adea3c07749dd447ffef9527d4dd5da9c0df24b28711eb
User & Date: ajv-899-334-8894@vsta.org 2015-04-12 20:27:18
Context
2015-04-12
20:27
Continue cooking up article loading/caching Leaf check-in: a208a798e8 user: ajv-899-334-8894@vsta.org tags: master, trunk
2015-04-08
21:05
Start shedding some of the old TIS code. Start coding up HTML GET support for main group list and article display. Code up first pass at organizing articles by threading. check-in: 0974799a2a user: ajv-899-334-8894@vsta.org tags: master, trunk
Changes
Hide Diffs Side-by-Side Diffs Ignore Whitespace Patch

Added article.py.

            1  +#
            2  +# article.py
            3  +#	Handling of articles
            4  +#
            5  +# This module interacts with the NNTP module; NNTP communication
            6  +#  brings articles into existence, which state is represented here.
            7  +# This module also deals with our design goal of minimizing the load
            8  +#  on the NNTP server by keeping state in the local filesystem, and
            9  +#  using that rather than making redundant accesses to NNTP.
           10  +#
           11  +
           12  +# Number of article bodies cached
           13  +CACHED = 500
           14  +
           15  +class Articles(object):
           16  +
           17  +# Storage is organized:
           18  +#
           19  +# data/messages/base64(message-ID)
           20  +#	-> actual contents
           21  +# data/group.name/int-index
           22  +#	-> message-ID
           23  +#
           24  +# 

Changes to get.py.

   261    261   	# Burst path
   262    262   	pp = p.strip("/").split("/")
   263    263   
   264    264   	# Top level
   265    265   	if (not pp) or ((len(pp) == 1) and not pp[0]):
   266    266   	    return self.send_top()
   267    267   
   268         -	# RESTful API
   269         -	if pp[0] == "rest":
   270         -	    return self.get_rest(pp[1:])
   271         -
   272         -	# First path component is "state"?
   273         -	# /stateX [/slotY]
   274         -	tis = self.server
   275         -	if pp[0].startswith("state"):
   276         -	    # stateX
   277         -	    stid = pp[0][5:]
   278         -
   279         -	    # /stateX.mp3?
   280         -	    isMP3 = stid.endswith(".mp3")
   281         -	    if isMP3:
   282         -		if len(pp) != 1:
   283         -		    # /stateX.mp3/more... wtf?
   284         -		    self.send_error(404, "File not found")
   285         -		    return None
   286         -		stid = stid[:-4]
   287         -
   288         -	    # /stateX.text?
   289         -	    isTXT = stid.endswith(".text")
   290         -	    if isTXT:
   291         -		if len(pp) != 1:
   292         -		    # /stateX.text/more... wtf?
   293         -		    self.send_error(404, "File not found")
   294         -		    return None
   295         -		stid = stid[:-5]
   296         -
   297         -	    # state<X>
   298         -	    if not stid.isdigit():
   299         -		self.send_error(404, "File not found")
   300         -		return None
   301         -	    idx = int(stid)
   302         -	    if (idx < 0) or (idx >= len(tis.states)):
   303         -		# But X is an illegal index
   304         -		self.send_error(404, "File not found")
   305         -		return None
   306         -
   307         -	    # /stateX.mp3 -> send audio
   308         -	    if isMP3:
   309         -		return self.send_files("data/state%d.mp3" % (idx,))
   310         -
   311         -	    # /stateX.text -> send rollup of State's text
   312         -	    if isTXT:
   313         -		return self.view_state_text(idx)
   314         -
   315         -	    # /stateX
   316         -	    # Editing state itself
          268  +	# Newsgroup?
          269  +	if legal_gname(pp[0]):
          270  +	    gname = pp[0]
   317    271   	    if len(pp) == 1:
   318         -		return self.send_state(idx)
   319         -
   320         -	    # Editing Slot?
   321         -	    # /state/edit<slot-UUID>
   322         -	    state = tis.states[idx]
   323         -	    if pp[1].startswith("edit"):
   324         -		# Edit URL:
   325         -		#  edit<slot UUID>
   326         -		#
   327         -		# This kicks off editing of a Slot, cloning
   328         -		#  the content so all the editing can be completed
   329         -		#  before any of it is seen (i.e., atomicity).
   330         -		#
   331         -		sid = pp[1][4:]
   332         -
   333         -		# Look up Slot UUID
   334         -		for sidx,slot in enumerate(state.slots):
   335         -		    if slot.uuid == sid:
   336         -			break
   337         -		else:
   338         -		    self.send_error(404, "File not found")
   339         -		    return None
   340         -
   341         -		# Just looking at the Slot itself
   342         -		if len(pp) == 2:
   343         -		    # Kick off clone/edit of the Slot; implement TIS exclusion
   344         -		    #  so all of the filesystem ops are atomic.
   345         -		    with tis.exclusion:
   346         -			buf = self.clone_slot(sidx, slot)
   347         -		    return buf
   348         -
   349         -	    # Malformed path, or trying to look at slot in
   350         -	    #  way not supported
   351         -	    self.send_error(404, "File not found")
   352         -	    return None
   353         -
   354         -	# An /editX reference?
   355         -	# This is the cloned content of a Slot
   356         -	if pp[0].startswith("edit"):
   357         -
   358         -	    # If all digits, we're working WRT an existing
   359         -	    #  edit.
   360         -	    sid = pp[0][4:]
   361         -	    if (not sid.isdigit()) or (len(pp) != 2):
   362         -		self.send_error(404, "File not found")
   363         -		return None
   364         -
   365         -	    # Sanity check
   366         -	    editidx = int(sid)
   367         -	    if (editidx < 0) or (editidx >= len(tis.edits)):
   368         -		self.send_error(404, "File not found")
   369         -		return None
   370         -
   371         -	    # Get tis.edits[], a copy of a Slot
   372         -	    slot = tis.edits[editidx]
   373         -	    if slot is None:
   374         -		self.send_error(404, "File not found")
   375         -		return None
   376         -
   377         -	    # Dig up original Slot's index
   378         -	    state = slot.state
   379         -	    for sidx,_slot in enumerate(state.slots):
   380         -		if _slot.uuid == slot.uuid:
   381         -		    break
   382         -	    else:
   383         -		# Shouldn't happen
   384         -		self.send_error(404, "File not found")
   385         -		return None
   386         -
   387         -	    # Referencing cloned Slot (most often this is
   388         -	    #  the redirect after cloning)
   389         -	    if pp[1] == "text":
   390         -		return self.edit_slot_text(editidx, sidx, slot)
   391         -
   392         -	    # Cloned Slot's text edited, now going to record
   393         -	    #  audio.
   394         -	    if pp[1] == "audio":
   395         -		return self.edit_slot_audio(editidx, sidx, slot)
   396         -
   397         -	    # Referencing cloned Slot item; usually playing the
   398         -	    #  mp3 audio, but others could be added...
   399         -	    if any(pp[1].endswith(suffix) for suffix in Litfiles):
   400         -		return self.send_edit_item(editidx, slot, pp[1])
   401         -
   402         -	    # Malformed path
   403         -	    self.send_error(404, "File not found")
   404         -	    return None
          272  +		return self.send_group(gname)
          273  +	    raise Exception, "TBD"
          274  +	    # Article access within group
   405    275   
   406    276   	# /js, /imgs, /latest:
   407    277   	# Subdirs with just literal content
          278  +	"""
          279  +	TBD
   408    280   	if pp[0] in ("js", "imgs", "latest"):
   409    281   	    if any(pp[-1].endswith(suffix) for suffix in Litfiles):
   410    282   		fname = os.path.join(*pp)
   411    283   		return self.send_files(fname)
   412         -
   413         -	# /html
   414         -	# Literal HTML source
   415         -	if pp[0] in ("html", ):
   416         -	    if any(pp[-1].endswith(suffix) for suffix in HTMLfiles):
   417         -		return self.send_files(os.path.join(*pp))
   418         -
   419         -	# /lib; interface to Slot library
   420         -	if pp[0] == "lib":
   421         -	    return self.get_lib()
          284  +	"""
   422    285   
   423    286   	# Bad path
   424    287   	self.send_error(404, "File not found")
   425    288   	return None
   426    289   

Changes to nntp.py.

     1      1   #
     2      2   # nntp.py
     3      3   #	Server interface into an NNTP server
     4      4   #
     5         -# Articles are cached in data/cache/<hash>, where <hash> is the
     6         -#  base64 encoding of a sha1() of the message ID (to avoid
     7         -#  filename issues).  It is left to a cron job to scrub that
     8         -#  directory periodically.
            5  +# The group and article state is stored in the local filesystem;
            6  +#  it is expected that a connection fo the actual NNTP server is
            7  +#  not needed except when updating the state of groups and
            8  +#  when new article contents is retrieved.
            9  +# This NNTP instance also provides the mutual exclusion so that
           10  +#  only one thread at a time is accessing the NNTP server and
           11  +#  updating local filesystem state.
     9     12   #
    10     13   import time, threading, sha1, nntplib
           14  +from utils import CachedDict
           15  +
           16  +# How many index->msgID mappings to cache per group
           17  +IDCACHED = 100
    11     18   
    12     19   # Don't ask the NNTP server about the same group at more frequent
    13     20   #  intervals than this.
    14     21   MINPOLL = 30
    15     22   
    16     23   # Close our NNTP connection after this many minutes of idleness
    17     24   IDLE = 2
           25  +
           26  +# Serialize NNTP and related cache directory modifications
           27  +class Exclusion(object):
           28  +    def __init__(self):
           29  +	self.mutex = threading.Lock()
           30  +    def __enter__(self):
           31  +	self.mutex.acquire()
           32  +    def __exit__(self, typ, val, traceback):
           33  +	self.mutex.release()
    18     34   
    19     35   # State for a single Usenet group
    20     36   #
    21     37   # nntp - Link to the NNTP instance using us
    22     38   # name - Our Usenet group name
    23     39   # when - Time when we last updated from the NNTP server
    24     40   # first/last - Article indices range from NNTP server
           41  +# indices - Set of int's of article indices in the group
           42  +#	max(indices) == self.last, min(indices) == self.first
           43  +#
           44  +# This instance also deals with filesystem state for the
           45  +#  articles in the group.
           46  +# data/group.name/<int-index>
           47  +#	-> message-ID
    25     48   class Group(object):
    26     49       def __init__(self, nntp, gname):
    27     50   	self.nntp = nntp
    28     51   	self.name = gname
    29     52   	self.when = self.first = self.last = None
           53  +	self.indices = set()
           54  +	self.to_msgid = CachedDict(IDCACHED)
    30     55   
    31     56       # Update first/last if needed
    32     57       def poll(self):
    33     58   	when = self.when
    34     59   	if when is None:
    35     60   	    needed = True
    36     61   	else:
................................................................................
    39     64   
    40     65   	# What we have is still good enough
    41     66   	if not needed:
    42     67   	    return
    43     68   
    44     69   	# Serialize
    45     70   	nntp = self.nntp
    46         -	nntp.lock()
    47         -
    48         -	# Raced
    49         -	if group.when != when:
    50         -	    nntp.unlock()
    51         -	    return self.poll()
    52         -
    53         -	# Establish NNTP server connection
    54         -	if not nntp.connect():
    55         -	    nntp.unlock()
    56         -	    return
    57         -	conn = nntp.conn
    58         -
    59         -	# Get dope on group
    60         -	try:
    61         -	    resp, count, first, last, name = conn.group(nm)
    62         -	    ok = True
    63         -	except:
    64         -	    ok = False
    65         -
    66         -	# If no network error, update the group
    67         -	if ok:
    68         -	    self.first = first
    69         -	    self.last = last
    70         -	    self.when = now
    71         -
    72         -	nntp.unlock()
           71  +	with nntp.exclusion:
           72  +
           73  +	    # We may have raced
           74  +	    when = self.when
           75  +	    needed = (now - when) > MINPOLL*60
           76  +	    if not needed:
           77  +		return
           78  +
           79  +	    # Establish NNTP server connection
           80  +	    if not nntp.connect():
           81  +		return
           82  +	    conn = nntp.conn
           83  +
           84  +	    # Get dope on group
           85  +	    try:
           86  +		resp, count, first, last, name = conn.group(nm)
           87  +		ok = True
           88  +	    except:
           89  +		ok = False
           90  +
           91  +	    # If no network error, update the group
           92  +	    if ok:
           93  +		self.first = first
           94  +		self.last = last
           95  +		self.when = now
    73     96   
    74     97   # All NNTP activity is wrapped up here
    75     98   #
    76     99   # server - NNTP server we connect to
    77    100   # user/pass - Account on the server
    78    101   # conn - nntplib.NNTP instance, while connected
    79    102   # when - Time when self.conn last used
          103  +# exclusion - Mutual exclusion, using the "with" construct
    80    104   class NNTP(object):
    81    105       def __init__(self, server, user, pass):
    82    106   
    83    107   	# Our NNTP account on the server
    84    108   	self.server = server
    85    109   	self.user = user
    86    110   	self.pass = pass
................................................................................
    90    114   	self.conn = None
    91    115   	self.last_used = None
    92    116   
    93    117   	# Keep track of when we last asked about a given group,
    94    118   	#  and what we were told at that time
    95    119   	self.groups = {}
    96    120   
    97         -	# When somebody wants us to do something, they kick this
    98         -	# When somebody wants to get new data for self.groups{}, they
    99         -	#  grab this and then go talk to nntplib.  Thus, we serialize
   100         -	#  on updates, while permitting web requests to be served
   101         -	#  immediately when the cached data suffices.
   102         -	self.sleeping = threading.Semaphore(1)
          121  +	# When somebody wants us to do something, they come
          122  +	#  through this
          123  +	self.exclusion = Exclusion()
   103    124   
   104    125       # Return a Group instance for this named Usenet group
   105    126       # Mint one on first reference, and update it if it's more than
   106    127       #  MINPOLL minutes out of date.
   107    128       def group(self, gname):
   108    129   	groups = self.groups
   109    130   

Added utils.py.

            1  +#
            2  +# utils.py
            3  +#	Various utilities
            4  +#
            5  +
            6  +_missing = object()
            7  +
            8  +# Doubly linked list of keys in a CachedDict
            9  +class DLList(object):
           10  +    def __init__(self, k, v):
           11  +	self.prev = self.next = None
           12  +	self.key = k
           13  +	self.val = v
           14  +
           15  +    # Remove ourselves from our place in a doubly
           16  +    #  linked list
           17  +    def remove(self):
           18  +	if self.prev is not None:
           19  +	    self.prev.next = self.next
           20  +	if self.next is not None:
           21  +	    self.next.prev = self.prev
           22  +	self.next = self.prev = None
           23  +
           24  +# Caching dict, with maximum size & LRU replacement
           25  +class CachedDict(object):
           26  +    def __init__(self, ncache):
           27  +	assert ncache > 1
           28  +	self.ncache = ncache
           29  +	self.hd = self.tl = None
           30  +	self.vals = {}
           31  +
           32  +    # Put value @v at key @k
           33  +    def __setitem__(self, k, v):
           34  +	if k not in self.vals:
           35  +	    if len(self.vals) == self.ncache:
           36  +		dropped = self.tl
           37  +		self.tl = dropped.prev
           38  +		self.tl.next = None
           39  +		del self.vals[dropped.key]
           40  +
           41  +	# Actual k/v registration
           42  +	added = DLList(k, v)
           43  +	self.vals[k] = added
           44  +
           45  +	# Linked list of all members of self.vals{}
           46  +	if self.hd is None:
           47  +	    # First element
           48  +	    self.hd = self.tl = added
           49  +	else:
           50  +	    # Put at head of list; most recent reference
           51  +	    self.hd.prev = added
           52  +	    added.next = self.hd
           53  +	    self.hd = added
           54  +
           55  +    # Fetch
           56  +    # If it's in there, also move to fron to LRU queue
           57  +    def __getitem__(self, k, default=_missing):
           58  +	if k not in self.vals:
           59  +	    if default is _missing:
           60  +		raise Exception, "Missing key"
           61  +	    return default
           62  +
           63  +	# Get our matching element, and remove from its
           64  +	#  old place in the LRU queue
           65  +	matched = self.vals[k]
           66  +	matched.remove()
           67  +
           68  +	# Put at front
           69  +	hd = self.hd
           70  +	matched.next = hd
           71  +	hd.prev = matched
           72  +	self.hd = matched
           73  +
           74  +	# Return our value
           75  +	return matched.val
           76  +