Examples

The examples subdirectory contains examples (based on the simple C++ examples) showing how to use the Python bindings: simpleindex.py, simplesearch.py, simpleexpand.py. There’s also simplematchdecider.py which shows how to define a MatchDecider in Python.

simplesearch.py

 1#!/usr/bin/env python
 2#
 3# Simple command-line search script.
 4#
 5# Copyright (C) 2003 James Aylett
 6# Copyright (C) 2004,2007,2009,2013 Olly Betts
 7#
 8# This program is free software; you can redistribute it and/or
 9# modify it under the terms of the GNU General Public License as
10# published by the Free Software Foundation; either version 2 of the
11# License, or (at your option) any later version.
12#
13# This program is distributed in the hope that it will be useful,
14# but WITHOUT ANY WARRANTY; without even the implied warranty of
15# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16# GNU General Public License for more details.
17#
18# You should have received a copy of the GNU General Public License
19# along with this program; if not, see
20# <https://www.gnu.org/licenses/>.
21
22import sys
23import xapian
24
25# We require at least two command line arguments.
26if len(sys.argv) < 3:
27    print("Usage: %s PATH_TO_DATABASE QUERY" % sys.argv[0], file=sys.stderr)
28    sys.exit(1)
29
30try:
31    # Open the database for searching.
32    database = xapian.Database(sys.argv[1])
33
34    # Start an enquire session.
35    enquire = xapian.Enquire(database)
36
37    # Combine the rest of the command line arguments with spaces between
38    # them, so that simple queries don't have to be quoted at the shell
39    # level.
40    query_string = str.join(' ', sys.argv[2:])
41
42    # Parse the query string to produce a Xapian::Query object.
43    qp = xapian.QueryParser()
44    stemmer = xapian.Stem("english")
45    qp.set_stemmer(stemmer)
46    qp.set_database(database)
47    qp.set_stemming_strategy(xapian.QueryParser.STEM_SOME)
48    query = qp.parse_query(query_string)
49    print("Parsed query is: %s" % str(query))
50
51    # Find the top 10 results for the query.
52    enquire.set_query(query)
53    matches = enquire.get_mset(0, 10)
54
55    # Display the results.
56    print("%i results found." % matches.get_matches_estimated())
57    print("Results 1-%i:" % matches.size())
58
59    for m in matches:
60        print("%i: %i%% docid=%i [%s]" % (m.rank + 1, m.percent, m.docid, m.document.get_data().decode('utf-8')))
61
62except Exception as e:
63    print("Exception: %s" % str(e), file=sys.stderr)
64    sys.exit(1)

simpleindex.py

 1#!/usr/bin/env python
 2#
 3# Index each paragraph of a text file as a Xapian document.
 4#
 5# Copyright (C) 2003 James Aylett
 6# Copyright (C) 2004,2007,2013,2014 Olly Betts
 7#
 8# This program is free software; you can redistribute it and/or
 9# modify it under the terms of the GNU General Public License as
10# published by the Free Software Foundation; either version 2 of the
11# License, or (at your option) any later version.
12#
13# This program is distributed in the hope that it will be useful,
14# but WITHOUT ANY WARRANTY; without even the implied warranty of
15# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16# GNU General Public License for more details.
17#
18# You should have received a copy of the GNU General Public License
19# along with this program; if not, see
20# <https://www.gnu.org/licenses/>.
21
22import sys
23import xapian
24import string
25
26if len(sys.argv) != 2:
27    print("Usage: %s PATH_TO_DATABASE" % sys.argv[0], file=sys.stderr)
28    sys.exit(1)
29
30try:
31    # Open the database for update, creating a new database if necessary.
32    database = xapian.WritableDatabase(sys.argv[1], xapian.DB_CREATE_OR_OPEN)
33
34    indexer = xapian.TermGenerator()
35    stemmer = xapian.Stem("english")
36    indexer.set_stemmer(stemmer)
37
38    para = ''
39    try:
40        for line in sys.stdin:
41            line = line.strip()
42            if line == '':
43                if para != '':
44                    # We've reached the end of a paragraph, so index it.
45                    doc = xapian.Document()
46                    doc.set_data(para)
47
48                    indexer.set_document(doc)
49                    indexer.index_text(para)
50
51                    # Add the document to the database.
52                    database.add_document(doc)
53                    para = ''
54            else:
55                if para != '':
56                    para += ' '
57                para += line
58    except StopIteration:
59        pass
60
61except Exception as e:
62    print("Exception: %s" % str(e), file=sys.stderr)
63    sys.exit(1)

simpleexpand.py

 1#!/usr/bin/env python
 2#
 3# Simple example script demonstrating query expansion.
 4#
 5# Copyright (C) 2003 James Aylett
 6# Copyright (C) 2004,2006,2007,2012,2013,2014 Olly Betts
 7#
 8# This program is free software; you can redistribute it and/or
 9# modify it under the terms of the GNU General Public License as
10# published by the Free Software Foundation; either version 2 of the
11# License, or (at your option) any later version.
12#
13# This program is distributed in the hope that it will be useful,
14# but WITHOUT ANY WARRANTY; without even the implied warranty of
15# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16# GNU General Public License for more details.
17#
18# You should have received a copy of the GNU General Public License
19# along with this program; if not, see
20# <https://www.gnu.org/licenses/>.
21
22import sys
23import xapian
24
25# We require at least two command line arguments.
26if len(sys.argv) < 3:
27    print("Usage: %s PATH_TO_DATABASE QUERY [-- [DOCID...]]" % sys.argv[0], file=sys.stderr)
28    sys.exit(1)
29
30try:
31    # Open the database for searching.
32    database = xapian.Database(sys.argv[1])
33
34    # Start an enquire session.
35    enquire = xapian.Enquire(database)
36
37    # Combine command line arguments up to "--" with spaces between
38    # them, so that simple queries don't have to be quoted at the shell
39    # level.
40    query_string = sys.argv[2]
41    index = 3
42    while index < len(sys.argv):
43        arg = sys.argv[index]
44        index += 1
45        if arg == '--':
46            # Passed marker, move to parsing relevant docids.
47            break
48        query_string += ' '
49        query_string += arg
50
51    # Create an RSet with the listed docids in.
52    reldocs = xapian.RSet()
53    for index in range(index, len(sys.argv)):
54        reldocs.add_document(int(sys.argv[index]))
55
56    # Parse the query string to produce a Xapian::Query object.
57    qp = xapian.QueryParser()
58    stemmer = xapian.Stem("english")
59    qp.set_stemmer(stemmer)
60    qp.set_database(database)
61    qp.set_stemming_strategy(xapian.QueryParser.STEM_SOME)
62    query = qp.parse_query(query_string)
63
64    if not query.empty():
65        print("Parsed query is: %s" % str(query))
66
67        # Find the top 10 results for the query.
68        enquire.set_query(query)
69        matches = enquire.get_mset(0, 10, reldocs)
70
71        # Display the results.
72        print("%i results found." % matches.get_matches_estimated())
73        print("Results 1-%i:" % matches.size())
74
75        for m in matches:
76            print("%i: %i%% docid=%i [%s]" % (m.rank + 1, m.percent, m.docid, m.document.get_data()))
77
78    # Put the top 5 (at most) docs into the rset if rset is empty
79    if reldocs.empty():
80        rel_count = 0
81        for m in matches:
82            reldocs.add_document(m.docid)
83            rel_count += 1
84            if rel_count == 5:
85                break
86
87    # Get the suggested expand terms
88    eterms = enquire.get_eset(10, reldocs)
89    print("%i suggested additional terms" % eterms.size())
90    for k in eterms:
91        print("%s: %f" % (k.term, k.weight))
92
93except Exception as e:
94    print("Exception: %s" % str(e), file=sys.stderr)
95    sys.exit(1)

simplematchdecider.py

 1#!/usr/bin/env python
 2#
 3# Simple command-line match decider example
 4#
 5# Copyright (C) 2003 James Aylett
 6# Copyright (C) 2004,2007,2009,2013 Olly Betts
 7#
 8# This program is free software; you can redistribute it and/or
 9# modify it under the terms of the GNU General Public License as
10# published by the Free Software Foundation; either version 2 of the
11# License, or (at your option) any later version.
12#
13# This program is distributed in the hope that it will be useful,
14# but WITHOUT ANY WARRANTY; without even the implied warranty of
15# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16# GNU General Public License for more details.
17#
18# You should have received a copy of the GNU General Public License
19# along with this program; if not, see
20# <https://www.gnu.org/licenses/>.
21
22import sys
23import xapian
24
25# This example runs a query like simplesearch does, but uses a MatchDecider
26# (mymatchdecider) to discard any document for which value 0 is equal to
27# the string passed as the second command line argument.
28
29if len(sys.argv) < 4:
30    print("Usage: %s PATH_TO_DATABASE AVOID_VALUE QUERY" % sys.argv[0], file=sys.stderr)
31    sys.exit(1)
32
33class mymatchdecider(xapian.MatchDecider):
34    def __init__(self, avoidvalue):
35        xapian.MatchDecider.__init__(self)
36        self.avoidvalue = avoidvalue
37
38    def __call__(self, doc):
39        return doc.get_value(0) != self.avoidvalue
40
41try:
42    # Open the database for searching.
43    database = xapian.Database(sys.argv[1])
44
45    # Start an enquire session.
46    enquire = xapian.Enquire(database)
47
48    # Combine the rest of the command line arguments with spaces between
49    # them, so that simple queries don't have to be quoted at the shell
50    # level.
51    avoid_value = sys.argv[2]
52    query_string = str.join(' ', sys.argv[3:])
53
54    # Parse the query string to produce a Xapian::Query object.
55    qp = xapian.QueryParser()
56    stemmer = xapian.Stem("english")
57    qp.set_stemmer(stemmer)
58    qp.set_database(database)
59    qp.set_stemming_strategy(xapian.QueryParser.STEM_SOME)
60    query = qp.parse_query(query_string)
61    print("Parsed query is: %s" % str(query))
62
63    # Find the top 10 results for the query.
64    enquire.set_query(query)
65    mdecider = mymatchdecider(avoid_value)
66    matches = enquire.get_mset(0, 10, None, mdecider)
67
68    # Display the results.
69    print("%i results found." % matches.get_matches_estimated())
70    print("Results 1-%i:" % matches.size())
71
72    for m in matches:
73        print("%i: %i%% docid=%i [%s]" % (m.rank + 1, m.percent, m.docid, m.document.get_data()))
74
75except Exception as e:
76    print("Exception: %s" % str(e), file=sys.stderr)
77    sys.exit(1)