Newsgroups: comp.sources.unix From: etaoin@uscyber.com (Eric Fischer) Subject: v29i073: grolier - read articles from Grolier's Multimedia Encyclopedia, Part01/01 Message-id: <1.821492753.7765@gw.home.vix.com> Sender: unix-sources-moderator@gw.home.vix.com Approved: vixie@gw.home.vix.com Submitted-By: etaoin@uscyber.com (Eric Fischer) Posting-Number: Volume 29, Issue 73 Archive-Name: grolier/part01 #!/bin/sh # This is a shell archive (produced by GNU sharutils 4.1). # To extract the files from this archive, save it to some FILE, remove # everything before the `!/bin/sh' line above, then type `sh FILE'. # # Made on 1996-01-06 22:18 CST by . # Source directory was `/home/eric'. # # Existing files will *not* be overwritten unless `-c' is specified. # # This shar contains: # length mode name # ------ ---------- ------------------------------------------ # 1033 -rw-r--r-- grolier/Makefile # 2566 -rw-r--r-- grolier/README # 2659 -rw-r--r-- grolier/grolier.1 # 9256 -rw-r--r-- grolier/grolier.c # 2047 -rw-r--r-- grolier/indexcd.c # # ============= grolier/Makefile ============== if test ! -d 'grolier'; then echo 'x - creating directory grolier' mkdir 'grolier' fi if test -f 'grolier/Makefile' && test X"$1" != X"-c"; then echo 'x - skipping grolier/Makefile (file already exists)' else echo 'x - extracting grolier/Makefile (text)' sed 's/^X//' << 'SHAR_EOF' > 'grolier/Makefile' && X# Makefile for grolier X# X# Eric Fischer, etaoin@wis.com / etaoin@uchicago.edu X X# your favorite C compiler X Xcc = cc X X# path to the encyclopedia database on the CD-ROM X Xdb = /GME96/Databases/CPL_Database/GME96.SRC X X# where files go when installed X Xprefix = /usr/local X Xlibdir = $(prefix)/lib Xinstalldir = $(prefix)/bin Xmandir = $(prefix)/man/man1 X X##################################################################### X Xall: grolier.index grolier X Xinstall: all X cp grolier $(installdir)/grolier X chmod 755 $(installdir)/grolier X cp grolier.index $(libdir)/grolier.index X chmod 644 $(libdir)/grolier.index X cp grolier.1 $(mandir)/grolier.1 X chmod 644 $(mandir)/grolier.1 X Xgrolier.index: indexcd $(db) X ./indexcd | tr '[A-Z]' '[a-z]' | sort | uniq > grolier.index X Xindexcd: indexcd.o X $(cc) -o indexcd indexcd.o X Xgrolier: grolier.o X $(cc) -o grolier grolier.o X X.c.o: X $(cc) -c -DDB=\"$(db)\" -DLIBDIR=\"$(libdir)\" $< X Xclean: X -rm grolier.index indexcd.o indexcd grolier.o grolier X Xshar: clean X cd ..; shar -m -F grolier/* > grolier.shar SHAR_EOF chmod 0644 'grolier/Makefile' || echo 'restore of grolier/Makefile failed' shar_count="`wc -c < 'grolier/Makefile'`" test 1033 -eq "$shar_count" || echo "grolier/Makefile: original size 1033, current size $shar_count" fi # ============= grolier/README ============== if test -f 'grolier/README' && test X"$1" != X"-c"; then echo 'x - skipping grolier/README (file already exists)' else echo 'x - extracting grolier/README (text)' sed 's/^X//' << 'SHAR_EOF' > 'grolier/README' && Xgrolier a program to retrieve articles from Xby Eric Fischer Grolier's Multimedia Encyclopedia X XJanuary 6, 1996 version X X XAn encyclopedia on CD-ROM is a good idea, but to be useful Xit has to be easy to look things up. Grolier didn't realize Xthis and produced a Macintosh client program that occupies Xhuge amounts of memory, has a single, non-resizeable window, Xand takes forever to do searches. "A brand-new, easy-to- Xuse interface," they call it. Yeah, right. X XFortunately, they made the mistake of including the actual Xtext of the encyclopedia in plain ASCII form with a formatting Xlanguage that's pretty easy to decode. So I wrote this program Xto do the decoding. X X XThis program was written to read the 1996 edition of Grolier's Xfrom the Macintosh-format CD. I haven't seen the MS-DOS CD Xor the edition from any other year, so this is probably the Xonly version of the CD that it'll work with. X X XThe CD, unfortunately, has a Macintosh file system, not a Rock XRidge one, so if your computer can't mount Macintosh disks Xyou'll need to borrow a Mac long enough to copy the encyclopedia Xdatabase to a local disk. Once the CD is accessible somehow, Xyou need to edit the Makefile variable $(db) to reflect the Xpath to the encyclopedia database. On my system the CD Xautomounts at /GME96, but if you put it somewhere else, Xmake sure you let the Makefile know. X XAfter that, you should just be able to do X X make X make install X Xbut be aware that "make" will take a long time because it has Xto index the entire encyclopedia. X XThere must be an index of some kind on the CD, but if there is Xit's not in as friendly a format as the text. So make compiles X"indexcd", which reads through the text of the encyclopedia, Xpulling out the article titles and putting them in the index Xfile "grolier.index". This will eat about 1.4 megs of disk, Xbut is a lot faster than searching the entire text every time! X XOnce you've made the index, indexcd is no longer needed and X"make install" doesn't install it -- it just copies the X"grolier" program itself, the "grolier.index" index file, Xand the "grolier.1" manual page. X X XThese programs haven't been tested on anything other than Xa Nextstation running Nextstep 3.0. I don't think I've done Xanything terribly unportable, but sometimes it's hard to tell. X X XI can currently be reached at any of the following addresses, Xthough some may eventually cease to work: X X etaoin@uchicago.edu / etaoin@wis.com / etaoin@uscyber.com X 4725 Sheboygan Ave #232, Madison WI 53705 X 5759 Guilford Ave, Indianapolis IN 46220 SHAR_EOF chmod 0644 'grolier/README' || echo 'restore of grolier/README failed' shar_count="`wc -c < 'grolier/README'`" test 2566 -eq "$shar_count" || echo "grolier/README: original size 2566, current size $shar_count" fi # ============= grolier/grolier.1 ============== if test -f 'grolier/grolier.1' && test X"$1" != X"-c"; then echo 'x - skipping grolier/grolier.1 (file already exists)' else echo 'x - extracting grolier/grolier.1 (text)' sed 's/^X//' << 'SHAR_EOF' > 'grolier/grolier.1' && X.TH GROLIER 1 "January 6, 1996" X.SH NAME Xgrolier \- read articles from Grolier's Encyclopedia X.SH SYNOPSIS X.B grolier X[ X.B \-trl X] X.I topic X.SH OPTIONS X.TP X.B \-t Xrather than formatting the text for screen display, include Xthe appropriate X.BR troff (1) Xformatting commands to typeset the articles. X.TP X.B \-r Xdon't format the text at all; just get it straight off the CD. X.TP X.B \-l Xdon't even retrieve the articles; just list their titles. X.SH DESCRIPTION X.B grolier Xlooks up the X.I topic Xof your choice on the 1996 Grolier's Multimedia Encyclopedia CD-ROM. XThere can be several words to the X.I topic, Xin which case only those articles whose titles match X.I all Xthe X.IR topic s Xare printed. X.LP XFor instance, X.LP X grolier chicago X.LP Xretrieves all the articles whose title includes the Xword ``Chicago'', while X.LP X grolier university chicago X.LP Xretrieves only those articles whose title includes Xboth of these words. X.LP XBy default, the output is formatted for on-screen reading; Xboldface and underlining use the backspacing Xconvention that is understood by X.BR less (1) Xand by many printers. XIf you specified X.B \-t Xyou instead get X.BR troff (1) Xsource code for the X.B \-ms Xmacro package. XIf you want to see the text as it appears on the CD, without Xmy code messing with it, X.B \-r Xwill return the article in Grolier's proprietary format. XIf you use X.B \-l Xthe articles aren't retrieved at all; only their titles Xare listed. X.LP XSo, for instance, if you wanted to see a list of all the articles Xwhose titles included ``Sturgeon'', you'd do X.LP X grolier -l sturgeon X.LP XTo read the article about Theodore Sturgeon, you could do X.LP X grolier theodore sturgeon | less X.LP Xand then to print out a nice typeset copy of it, X.LP X grolier -t theodore sturgeon | troff -ms X.SH HINTS X.B grolier Xsearches the index for the first word of the X.I topic, Xthen gets the titles for all those articles and compares them Xagainst the other words. So put the least common word Xas the first topic and you'll get a much faster search. X.SH FILES X.TP 2i X/usr/local/lib/grolier.index Xa list of all the article titles and their byte offset into the Xencylopedia file X.TP X/\s-2GME\s096/Databases/\s-2CPL\s0_Database/\s-2GME\s096.\s-2SRC\s0 Xthe actual text of the encyclopedia X.SH BUGS XThe index format means you can only search for full words, Xwhich is sometimes annoying. It also only indexes the Xfirst six letters of each word, so longer words cause Xfalse positives that slow the search. X.LP XMany of the entries are for pictures or sounds that you can't Xsee in their full glory with this text-based retriever. X.SH AUTHOR XEric Fischer, etaoin@{uchicago.edu, wis.com, uscyber.com} SHAR_EOF chmod 0644 'grolier/grolier.1' || echo 'restore of grolier/grolier.1 failed' shar_count="`wc -c < 'grolier/grolier.1'`" test 2659 -eq "$shar_count" || echo "grolier/grolier.1: original size 2659, current size $shar_count" fi # ============= grolier/grolier.c ============== if test -f 'grolier/grolier.c' && test X"$1" != X"-c"; then echo 'x - skipping grolier/grolier.c (file already exists)' else echo 'x - extracting grolier/grolier.c (text)' sed 's/^X//' << 'SHAR_EOF' > 'grolier/grolier.c' && X#include X#include X#include X#include X#include X#include X X/* grolier -- X retrieve articles from Grolier's Multimedia Encyclopedia X X by Eric Fischer, etaoin@{wis.com,uscyber.com,uchicago.edu} X */ X X#define LEN 600 X#define SIGNIF 6 X#define RECLEN (8 + SIGNIF + 1) X Xtypedef enum { X internal, nroffsrc, raw, list X} how; X Xtypedef enum { X roman, italic, bold X} style; X Xchar *program; Xhow fmthow = internal; Xint screenwid = 70; X Xint Xwordwid (s) X char *s; X{ X int w = 0; X X while (*s && *s != '_' && *s != '-' && !isspace (*s)) { X s++, w++; X } X X return w; X} X Xvoid Xdie (s) X char *s; X{ X perror (s); X exit (1); X} X Xvoid Xformat (db) X FILE *db; X{ X char y[LEN]; X style thestyle = roman; X int hpos = 0; X X while (fgets (y, LEN, db) && strncmp (y, "-S-", 3)) { X int l = strlen (y) - 1; X X /* chop off CRLF */ X X while (l && (y[l] == '\r' || y[l] == '\n')) { X y[l] = 0; X l--; X } X X if (fmthow == raw) { X printf ("%s\n", y); X } else { X char *t = y; X X while (*t) { X while (!strncmp (t, "_^", 2)) { X if (!strncmp (t, "_^i_", 5) || X !strncmp (t, "_^>I_", 5)) { X X /* end italic */ X X if (fmthow == nroffsrc) printf ("\\fR"); X else thestyle = roman; X X t += 5; X } else if (!strncmp (t, "_^p_", 5)) { X X /* end section header */ X X t += 5; X } else if (!strncmp (t, "_^a_", 5)) { X X /* end link */ X X if (fmthow == nroffsrc) printf ("\\fR"); X else thestyle = roman; X X t += 5; X } else if (!strncmp (t, "_^b_", 5) || X !strncmp (t, "_^>B_", 5)) { X X /* end boldface */ X X if (fmthow == nroffsrc) printf ("\\fR"); X else thestyle = roman; X X t += 5; X } else break; X } X X if (*t) { X if (fmthow == nroffsrc) { X putchar (*t); X } else { X X /* break the line if this word won't fit */ X X if (wordwid (t) + hpos > screenwid) { X printf ("\n"); X hpos = 0; X } X X /* fake bold with overstriking; X fake underlining overstriking an underline X */ X X if (thestyle == bold) { X printf ("%c\b", *t); X } else if (thestyle == italic) { X printf ("_\b"); X } X X putchar (*t); X hpos++; X } X X t++; X } X } X } X } X X printf ("\n"); X} X Xvoid Xsearch (nstrs, strs, db) X int nstrs; X char **strs; X FILE *db; X{ X static char *index = 0; X FILE *ix; X struct stat buf; X long nrec; X long bottom, top, here; X char keystr[SIGNIF + 1]; X int foundone = 0; X int i; X char try[LEN]; X X /* if this is the first time through, figure out the full name X of the index file. X */ X X if (index == 0) { X index = malloc (strlen (LIBDIR) + strlen ("/grolier.index") + 1); X if (index == 0) { X perror ("malloc!"); X exit (1); X } X X strcpy (index, LIBDIR); X strcat (index, "/grolier.index"); X } X X /* first look for an index in our directory, then in libdir... */ X X if (stat ("grolier.index", &buf)) { X if (stat (index, &buf)) { X perror (index); X exit (1); X } X } X X /* and get the its number of records by dividing the length by X the length of a record X */ X X nrec = buf.st_size / RECLEN; X X /* now we try to read the file; again try first here then in X libdir... X */ X X ix = fopen ("grolier.index", "r"); X X if (ix == 0) { X ix = fopen (index, "r"); X X if (ix == 0) { X perror (index); X exit (1); X } X } X X /* copy the first key to use as a key for binary search X and pad it to be SIGNIF chars long X */ X X for (i = 0; strs[0][i] && i < SIGNIF; i++) { X keystr[i] = strs[0][i]; X } X for (; i < SIGNIF; i++) { X keystr[i] = ' '; X } X keystr[SIGNIF] = 0; X X /* now we start a binary search... */ X X bottom = 0; X top = nrec - 1; X here = top / 2; X X /* keep searching for a match ... */ X X while (top >= bottom) { X int a; X X /* get record number HERE */ X X if (fseek (ix, here * RECLEN, SEEK_SET)) die ("index seek"); X if (!fgets (try, LEN, ix)) die ("index get"); X X /* compare our key with what's actually at that record */ X X a = strncmp (keystr, try, SIGNIF); X X if (a < 0) { /* we're too late in the file */ X top = here - 1; X here = (top + bottom) / 2; X } else if (a > 0) { /* we're too early */ X bottom = here + 1; X here = (top + bottom) / 2; X } else { /* just right! */ X foundone = 1; X break; X } X } X X if (!foundone) { X fprintf (stderr, "no matches for %s\n", *strs); X return; X } X X /* now seek backwards for the first of them. */ X X while (here > 0) { X if (fseek (ix, (here - 1) * RECLEN, SEEK_SET)) die ("index seek"); X if (!fgets (try, LEN, ix)) die ("index get"); X X /* if that key doesn't match ours, we went too far */ X X if (strncmp (keystr, try, SIGNIF) != 0) { X break; X } X X here--; X } X X /* now find the ones whose subjects match all the keys */ X X while (here < nrec) { X long off; X char dup[LEN]; X char *x; X int i; X X /* go back (forward, actually) to the last one where it X actually did match X */ X X if (fseek (ix, here * RECLEN, SEEK_SET)) die ("index seek"); X X /* get a line, make sure we haven't gone past end of file */ X X if (!fgets (try, LEN, ix)) break; X X /* have we gone past the keys for this subject? */ X X if (strncmp (try, keystr, SIGNIF)) break; X X /* the offset into the database is found just after X the key name, so grab that out of the index, and X scoot the database file to there. X */ X X off = atol (try + SIGNIF); X if (fseek (db, off, SEEK_SET)) die ("database seek"); X X /* skip until we get to the article title (the line X after "-A-") X */ X X while (fgets (try, LEN, db) && strncmp (try, "-A-", 3)) { X ; X } X X /* grab the title, complain if the CD died */ X X if (!fgets (try, LEN, db)) die ("database get"); X X /* strip off the CRLF at the end */ X X for (x = try; *x; x++) { X if (*x == '\r') *x = 0; X } X X /* make a copy that we'll convert to lowercase */ X X strcpy (dup, try); X X for (x = dup; *x; x++) { X *x = tolower (*x); X } X X /* and see if all the search keys appear in that X lowercased title. X */ X X for (i = 0; i < nstrs; i++) { X if (!strstr (dup, strs[i])) { X i = 0; X break; X } X } X X /* if they all did, then print the title and article */ X X if (i != 0) { X if (fmthow == list) { X printf ("%s\n", try); X } else { X if (fmthow == raw) { X printf ("\n---- %s ----\n\n", try); X } else if (fmthow == nroffsrc) { X printf (".TL\n%s\n.LP\n", try); X } else { X printf ("\n---- %s ----\n\n", try); X } X X /* article text starts after "-T-" */ X X while (fgets (try, LEN, db) && strncmp (try, "-T-", 3)) ; X X format (db); X } X } X X here++; X } X X fclose (ix); X} X Xvoid Xusage() X{ X fprintf (stderr, "\nUsage: %s [-trl] topic ...\n\n", program); X fprintf (stderr, " -t outputs troff source for printing\n"); X fprintf (stderr, " -r sends text straight off the CD\n"); X fprintf (stderr, " -l just list matching article titles\n\n"); X} X Xint Xmain (argc, argv) X int argc; X char **argv; X{ X int i; X FILE *db; X char *x; X X program = *argv; X X while (argc > 1 && argv[1][0] == '-') { X char *x = argv[1] + 1; X X while (*x) { X switch (*x) { X case 't': X fmthow = nroffsrc; X break; X case 'r': X fmthow = raw; X break; X case 'l': X fmthow = list; X break; X default: X usage(); X exit (1); X } X x++; X } X X argc--, argv++; X } X X if (argc < 2) { X usage(); X exit (1); X } X X#ifdef TIOCGSIZE X { X struct ttysize win; X X if (!ioctl (2, TIOCGSIZE, &win) && win.ts_cols) { X screenwid = win.ts_cols - 10; X } X } X#endif X#ifdef TIOCGWINSZ X { X struct winsize win; X X if (!ioctl (2, TIOCGWINSZ, &win) && win.ws_col) { X screenwid = win.ws_col - 10; X } X } X#endif X X db = fopen (DB, "r"); X if (db == 0) { X perror (DB); X exit (1); X } X X for (x = argv[1]; *x; x++) { X if (! (isalpha (*x) || isdigit (*x))) { X fprintf (stderr, "%s: only letters and numbers are indexed, ", X program); X fprintf (stderr, "so the %c in %s\n", *x, argv[1]); X fprintf (stderr, "is going to keep you from finding anything\n"); X exit (1); X } X } X X for (i = 1; i < argc; i++) { X for (x = argv[i]; *x; x++) { X *x = tolower (*x); X } X } X X search (argc - 1, argv + 1, db); X fclose (db); X X return 0; X} SHAR_EOF chmod 0644 'grolier/grolier.c' || echo 'restore of grolier/grolier.c failed' shar_count="`wc -c < 'grolier/grolier.c'`" test 9256 -eq "$shar_count" || echo "grolier/grolier.c: original size 9256, current size $shar_count" fi # ============= grolier/indexcd.c ============== if test -f 'grolier/indexcd.c' && test X"$1" != X"-c"; then echo 'x - skipping grolier/indexcd.c (file already exists)' else echo 'x - extracting grolier/indexcd.c (text)' sed 's/^X//' << 'SHAR_EOF' > 'grolier/indexcd.c' && X#include X#include X#include X X/* indexcd -- X make an index of Grolier's Multimedia Encyclopedia so the X "grolier" program can look things up quickly. X X by Eric Fischer, etaoin@{wis.com,uscyber.com,uchicago.edu} X */ X X#define BUF 600 X#define EVERY 50 X#define SIGNIF 6 X Xint Xmain (argc, argv) X int argc; X char **argv; X{ X char s[BUF]; X FILE *f; X long off; X int inc = 0; X X f = fopen (DB, "r"); X if (f == 0) { X fprintf (stderr, "%s: can't open %s", argv[0], DB); X exit (1); X } X X fprintf (stderr, "(listing every %dth subject)\n", EVERY); X X while (fgets (s, BUF, f)) { X int x; X X /* each article starts with a "-C-" line */ X X if (strncmp (s, "-C-", 3) == 0) { X off = ftell (f) - strlen (s); X X /* skip to the title, "-A-" */ X X while (fgets (s, BUF, f) && strncmp (s, "-A-", 3)) { X ; X } X X if (fgets (s, BUF, f)) { X char *a = s; X X x = strlen (s) - 1; X X /* strip the CRLF */ X X while ((s[x] == '\n') || (s[x] == '\r')) { X s[x] = 0; X x--; X } X X /* skip to the first word */ X X while (*a && !(isdigit (*a) || isalpha (*a))) a++; X X /* make an index entry for each word in the title */ X X while (*a) { X int l = 0; X int i; X X /* find the length of this word */ X X while (a[l] && (isdigit (a[l]) || isalpha (a[l]))) l++; X X /* and print at most SIGNIF chars of it */ X X for (i = 0; i < l && i < SIGNIF; i++) { X printf ("%c", a[i]); X } X X /* then pad it out to SIGNIF if it was shorter */ X X for (; i < SIGNIF; i++) { X printf (" "); X } X X /* and print the offset into the database */ X X printf ("%8.8ld\n", off); X X /* then find the next word. */ X X while (*a && (isdigit (*a) || isalpha (*a))) a++; X while (*a && !(isdigit (*a) || isalpha (*a))) a++; X } X X inc++; X if (inc == EVERY) { X fprintf (stderr, "%s\n", s); X inc = 0; X } X } X X /* and then find the next article... */ X X while (fgets (s, BUF, f) && strncmp (s, "-END-", 5)) { X ; X } X } X } X X return 0; X} SHAR_EOF chmod 0644 'grolier/indexcd.c' || echo 'restore of grolier/indexcd.c failed' shar_count="`wc -c < 'grolier/indexcd.c'`" test 2047 -eq "$shar_count" || echo "grolier/indexcd.c: original size 2047, current size $shar_count" fi exit 0