wusenet

Check-in [a208a798e8]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Continue cooking up article loading/caching
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | master | trunk
Files: files | file ages | folders
SHA3-256:a208a798e807ae5713adea3c07749dd447ffef9527d4dd5da9c0df24b28711eb
User & Date: ajv-899-334-8894@vsta.org 2015-04-12 20:27:18
Context
2015-04-12
20:27
Continue cooking up article loading/caching Leaf check-in: a208a798e8 user: ajv-899-334-8894@vsta.org tags: master, trunk
2015-04-08
21:05
Start shedding some of the old TIS code. Start coding up HTML GET support for main group list and article display. Code up first pass at organizing articles by threading. check-in: 0974799a2a user: ajv-899-334-8894@vsta.org tags: master, trunk
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Added article.py.

















































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
#
# article.py
#	Handling of articles
#
# This module interacts with the NNTP module; NNTP communication
#  brings articles into existence, which state is represented here.
# This module also deals with our design goal of minimizing the load
#  on the NNTP server by keeping state in the local filesystem, and
#  using that rather than making redundant accesses to NNTP.
#

# Number of article bodies cached
CACHED = 500

class Articles(object):

# Storage is organized:
#
# data/messages/base64(message-ID)
#	-> actual contents
# data/group.name/int-index
#	-> message-ID
#
# 

Changes to get.py.

261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318


319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407


408
409
410
411
412
413
414
415
416
417
418
419
420
421

422
423
424
425
426
	# Burst path
	pp = p.strip("/").split("/")

	# Top level
	if (not pp) or ((len(pp) == 1) and not pp[0]):
	    return self.send_top()

	# RESTful API
	if pp[0] == "rest":
	    return self.get_rest(pp[1:])

	# First path component is "state"?
	# /stateX [/slotY]
	tis = self.server
	if pp[0].startswith("state"):
	    # stateX
	    stid = pp[0][5:]

	    # /stateX.mp3?
	    isMP3 = stid.endswith(".mp3")
	    if isMP3:
		if len(pp) != 1:
		    # /stateX.mp3/more... wtf?
		    self.send_error(404, "File not found")
		    return None
		stid = stid[:-4]

	    # /stateX.text?
	    isTXT = stid.endswith(".text")
	    if isTXT:
		if len(pp) != 1:
		    # /stateX.text/more... wtf?
		    self.send_error(404, "File not found")
		    return None
		stid = stid[:-5]

	    # state<X>
	    if not stid.isdigit():
		self.send_error(404, "File not found")
		return None
	    idx = int(stid)
	    if (idx < 0) or (idx >= len(tis.states)):
		# But X is an illegal index
		self.send_error(404, "File not found")
		return None

	    # /stateX.mp3 -> send audio
	    if isMP3:
		return self.send_files("data/state%d.mp3" % (idx,))

	    # /stateX.text -> send rollup of State's text
	    if isTXT:
		return self.view_state_text(idx)

	    # /stateX
	    # Editing state itself
	    if len(pp) == 1:
		return self.send_state(idx)



	    # Editing Slot?
	    # /state/edit<slot-UUID>
	    state = tis.states[idx]
	    if pp[1].startswith("edit"):
		# Edit URL:
		#  edit<slot UUID>
		#
		# This kicks off editing of a Slot, cloning
		#  the content so all the editing can be completed
		#  before any of it is seen (i.e., atomicity).
		#
		sid = pp[1][4:]

		# Look up Slot UUID
		for sidx,slot in enumerate(state.slots):
		    if slot.uuid == sid:
			break
		else:
		    self.send_error(404, "File not found")
		    return None

		# Just looking at the Slot itself
		if len(pp) == 2:
		    # Kick off clone/edit of the Slot; implement TIS exclusion
		    #  so all of the filesystem ops are atomic.
		    with tis.exclusion:
			buf = self.clone_slot(sidx, slot)
		    return buf

	    # Malformed path, or trying to look at slot in
	    #  way not supported
	    self.send_error(404, "File not found")
	    return None

	# An /editX reference?
	# This is the cloned content of a Slot
	if pp[0].startswith("edit"):

	    # If all digits, we're working WRT an existing
	    #  edit.
	    sid = pp[0][4:]
	    if (not sid.isdigit()) or (len(pp) != 2):
		self.send_error(404, "File not found")
		return None

	    # Sanity check
	    editidx = int(sid)
	    if (editidx < 0) or (editidx >= len(tis.edits)):
		self.send_error(404, "File not found")
		return None

	    # Get tis.edits[], a copy of a Slot
	    slot = tis.edits[editidx]
	    if slot is None:
		self.send_error(404, "File not found")
		return None

	    # Dig up original Slot's index
	    state = slot.state
	    for sidx,_slot in enumerate(state.slots):
		if _slot.uuid == slot.uuid:
		    break
	    else:
		# Shouldn't happen
		self.send_error(404, "File not found")
		return None

	    # Referencing cloned Slot (most often this is
	    #  the redirect after cloning)
	    if pp[1] == "text":
		return self.edit_slot_text(editidx, sidx, slot)

	    # Cloned Slot's text edited, now going to record
	    #  audio.
	    if pp[1] == "audio":
		return self.edit_slot_audio(editidx, sidx, slot)

	    # Referencing cloned Slot item; usually playing the
	    #  mp3 audio, but others could be added...
	    if any(pp[1].endswith(suffix) for suffix in Litfiles):
		return self.send_edit_item(editidx, slot, pp[1])

	    # Malformed path
	    self.send_error(404, "File not found")
	    return None

	# /js, /imgs, /latest:
	# Subdirs with just literal content


	if pp[0] in ("js", "imgs", "latest"):
	    if any(pp[-1].endswith(suffix) for suffix in Litfiles):
		fname = os.path.join(*pp)
		return self.send_files(fname)

	# /html
	# Literal HTML source
	if pp[0] in ("html", ):
	    if any(pp[-1].endswith(suffix) for suffix in HTMLfiles):
		return self.send_files(os.path.join(*pp))

	# /lib; interface to Slot library
	if pp[0] == "lib":
	    return self.get_lib()


	# Bad path
	self.send_error(404, "File not found")
	return None








|
|
<
<
<
<
<
<
<
|
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<

|
>
>

<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<


>
>




<
<
<
<
<
<
<
<
<
<
>





261
262
263
264
265
266
267
268
269







270







































271
272
273
274
275






















































































276
277
278
279
280
281
282
283










284
285
286
287
288
289
	# Burst path
	pp = p.strip("/").split("/")

	# Top level
	if (not pp) or ((len(pp) == 1) and not pp[0]):
	    return self.send_top()

	# Newsgroup?
	if legal_gname(pp[0]):







	    gname = pp[0]







































	    if len(pp) == 1:
		return self.send_group(gname)
	    raise Exception, "TBD"
	    # Article access within group























































































	# /js, /imgs, /latest:
	# Subdirs with just literal content
	"""
	TBD
	if pp[0] in ("js", "imgs", "latest"):
	    if any(pp[-1].endswith(suffix) for suffix in Litfiles):
		fname = os.path.join(*pp)
		return self.send_files(fname)










	"""

	# Bad path
	self.send_error(404, "File not found")
	return None

Changes to nntp.py.

1
2
3
4
5
6
7
8



9
10




11
12
13
14
15
16
17









18
19
20
21
22
23
24







25
26
27
28
29


30
31
32
33
34
35
36
..
39
40
41
42
43
44
45
46
47
48
49
50

51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79

80
81
82
83
84
85
86
..
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
#
# nntp.py
#	Server interface into an NNTP server
#
# Articles are cached in data/cache/<hash>, where <hash> is the
#  base64 encoding of a sha1() of the message ID (to avoid
#  filename issues).  It is left to a cron job to scrub that
#  directory periodically.



#
import time, threading, sha1, nntplib





# Don't ask the NNTP server about the same group at more frequent
#  intervals than this.
MINPOLL = 30

# Close our NNTP connection after this many minutes of idleness
IDLE = 2










# State for a single Usenet group
#
# nntp - Link to the NNTP instance using us
# name - Our Usenet group name
# when - Time when we last updated from the NNTP server
# first/last - Article indices range from NNTP server







class Group(object):
    def __init__(self, nntp, gname):
	self.nntp = nntp
	self.name = gname
	self.when = self.first = self.last = None



    # Update first/last if needed
    def poll(self):
	when = self.when
	if when is None:
	    needed = True
	else:
................................................................................

	# What we have is still good enough
	if not needed:
	    return

	# Serialize
	nntp = self.nntp
	nntp.lock()

	# Raced
	if group.when != when:
	    nntp.unlock()

	    return self.poll()

	# Establish NNTP server connection
	if not nntp.connect():
	    nntp.unlock()
	    return
	conn = nntp.conn

	# Get dope on group
	try:
	    resp, count, first, last, name = conn.group(nm)
	    ok = True
	except:
	    ok = False

	# If no network error, update the group
	if ok:
	    self.first = first
	    self.last = last
	    self.when = now

	nntp.unlock()

# All NNTP activity is wrapped up here
#
# server - NNTP server we connect to
# user/pass - Account on the server
# conn - nntplib.NNTP instance, while connected
# when - Time when self.conn last used

class NNTP(object):
    def __init__(self, server, user, pass):

	# Our NNTP account on the server
	self.server = server
	self.user = user
	self.pass = pass
................................................................................
	self.conn = None
	self.last_used = None

	# Keep track of when we last asked about a given group,
	#  and what we were told at that time
	self.groups = {}

	# When somebody wants us to do something, they kick this
	# When somebody wants to get new data for self.groups{}, they
	#  grab this and then go talk to nntplib.  Thus, we serialize
	#  on updates, while permitting web requests to be served
	#  immediately when the cached data suffices.
	self.sleeping = threading.Semaphore(1)

    # Return a Group instance for this named Usenet group
    # Mint one on first reference, and update it if it's more than
    #  MINPOLL minutes out of date.
    def group(self, gname):
	groups = self.groups





|
|
|
|
>
>
>


>
>
>
>







>
>
>
>
>
>
>
>
>







>
>
>
>
>
>
>





>
>







 







|

|
|
|
>
|

|
|
<
|
|

|
|
|
|
|
|

|
|
|
|
|
<
<







>







 







|
|
|
<
<
<







1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
..
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80

81
82
83
84
85
86
87
88
89
90
91
92
93
94
95


96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
...
114
115
116
117
118
119
120
121
122
123



124
125
126
127
128
129
130
#
# nntp.py
#	Server interface into an NNTP server
#
# The group and article state is stored in the local filesystem;
#  it is expected that a connection fo the actual NNTP server is
#  not needed except when updating the state of groups and
#  when new article contents is retrieved.
# This NNTP instance also provides the mutual exclusion so that
#  only one thread at a time is accessing the NNTP server and
#  updating local filesystem state.
#
import time, threading, sha1, nntplib
from utils import CachedDict

# How many index->msgID mappings to cache per group
IDCACHED = 100

# Don't ask the NNTP server about the same group at more frequent
#  intervals than this.
MINPOLL = 30

# Close our NNTP connection after this many minutes of idleness
IDLE = 2

# Serialize NNTP and related cache directory modifications
class Exclusion(object):
    def __init__(self):
	self.mutex = threading.Lock()
    def __enter__(self):
	self.mutex.acquire()
    def __exit__(self, typ, val, traceback):
	self.mutex.release()

# State for a single Usenet group
#
# nntp - Link to the NNTP instance using us
# name - Our Usenet group name
# when - Time when we last updated from the NNTP server
# first/last - Article indices range from NNTP server
# indices - Set of int's of article indices in the group
#	max(indices) == self.last, min(indices) == self.first
#
# This instance also deals with filesystem state for the
#  articles in the group.
# data/group.name/<int-index>
#	-> message-ID
class Group(object):
    def __init__(self, nntp, gname):
	self.nntp = nntp
	self.name = gname
	self.when = self.first = self.last = None
	self.indices = set()
	self.to_msgid = CachedDict(IDCACHED)

    # Update first/last if needed
    def poll(self):
	when = self.when
	if when is None:
	    needed = True
	else:
................................................................................

	# What we have is still good enough
	if not needed:
	    return

	# Serialize
	nntp = self.nntp
	with nntp.exclusion:

	    # We may have raced
	    when = self.when
	    needed = (now - when) > MINPOLL*60
	    if not needed:
		return

	    # Establish NNTP server connection
	    if not nntp.connect():

		return
	    conn = nntp.conn

	    # Get dope on group
	    try:
		resp, count, first, last, name = conn.group(nm)
		ok = True
	    except:
		ok = False

	    # If no network error, update the group
	    if ok:
		self.first = first
		self.last = last
		self.when = now



# All NNTP activity is wrapped up here
#
# server - NNTP server we connect to
# user/pass - Account on the server
# conn - nntplib.NNTP instance, while connected
# when - Time when self.conn last used
# exclusion - Mutual exclusion, using the "with" construct
class NNTP(object):
    def __init__(self, server, user, pass):

	# Our NNTP account on the server
	self.server = server
	self.user = user
	self.pass = pass
................................................................................
	self.conn = None
	self.last_used = None

	# Keep track of when we last asked about a given group,
	#  and what we were told at that time
	self.groups = {}

	# When somebody wants us to do something, they come
	#  through this
	self.exclusion = Exclusion()




    # Return a Group instance for this named Usenet group
    # Mint one on first reference, and update it if it's more than
    #  MINPOLL minutes out of date.
    def group(self, gname):
	groups = self.groups

Added utils.py.

























































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
#
# utils.py
#	Various utilities
#

_missing = object()

# Doubly linked list of keys in a CachedDict
class DLList(object):
    def __init__(self, k, v):
	self.prev = self.next = None
	self.key = k
	self.val = v

    # Remove ourselves from our place in a doubly
    #  linked list
    def remove(self):
	if self.prev is not None:
	    self.prev.next = self.next
	if self.next is not None:
	    self.next.prev = self.prev
	self.next = self.prev = None

# Caching dict, with maximum size & LRU replacement
class CachedDict(object):
    def __init__(self, ncache):
	assert ncache > 1
	self.ncache = ncache
	self.hd = self.tl = None
	self.vals = {}

    # Put value @v at key @k
    def __setitem__(self, k, v):
	if k not in self.vals:
	    if len(self.vals) == self.ncache:
		dropped = self.tl
		self.tl = dropped.prev
		self.tl.next = None
		del self.vals[dropped.key]

	# Actual k/v registration
	added = DLList(k, v)
	self.vals[k] = added

	# Linked list of all members of self.vals{}
	if self.hd is None:
	    # First element
	    self.hd = self.tl = added
	else:
	    # Put at head of list; most recent reference
	    self.hd.prev = added
	    added.next = self.hd
	    self.hd = added

    # Fetch
    # If it's in there, also move to fron to LRU queue
    def __getitem__(self, k, default=_missing):
	if k not in self.vals:
	    if default is _missing:
		raise Exception, "Missing key"
	    return default

	# Get our matching element, and remove from its
	#  old place in the LRU queue
	matched = self.vals[k]
	matched.remove()

	# Put at front
	hd = self.hd
	matched.next = hd
	hd.prev = matched
	self.hd = matched

	# Return our value
	return matched.val