Skip to content

Instantly share code, notes, and snippets.

@bloomonkey
Created October 24, 2012 11:36
Show Gist options
  • Select an option

  • Save bloomonkey/3945599 to your computer and use it in GitHub Desktop.

Select an option

Save bloomonkey/3945599 to your computer and use it in GitHub Desktop.

Revisions

  1. bloomonkey revised this gist Oct 24, 2012. 1 changed file with 3 additions and 0 deletions.
    3 changes: 3 additions & 0 deletions subjectURIs.py
    Original file line number Diff line number Diff line change
    @@ -67,15 +67,18 @@ def get_outputStream(options):


    def printSubjectURIs(options, args):
    # Put the subject into a query string
    if options.components:
    qString = 'dc.subject exact "{0}"'.format(' '.join(args[1:]))
    else:
    qString = 'dc.subject exact "{0}" and ead.istoplevel=1'.format(' '.join(args[1:]))
    # Parse the query CQL into a Query object
    try:
    q = qf.get_query(session, qString)
    except:
    session.logger.log_error(session, "Invalid CQL: {0}".format(qString))
    return 1
    # Search the database with the Query
    resultSet = db.search(session, q)
    unitidWf = db.get_object(session, 'unitidIdentifierWorkflow')
    # Get the output stream (usually file or stdout)
  2. bloomonkey revised this gist Oct 24, 2012. 1 changed file with 9 additions and 3 deletions.
    12 changes: 9 additions & 3 deletions subjectURIs.py
    Original file line number Diff line number Diff line change
    @@ -76,13 +76,19 @@ def printSubjectURIs(options, args):
    except:
    session.logger.log_error(session, "Invalid CQL: {0}".format(qString))
    return 1
    rs = db.search(session, q)
    resultSet = db.search(session, q)
    unitidWf = db.get_object(session, 'unitidIdentifierWorkflow')
    # Get the output stream (usually file or stdout)
    with get_outputStream(options) as outStream:
    for rsi in rs:
    rec = rsi.fetch_record(session, schema='ead')
    # Iterate through all results in resultSet
    for result in resultSet:
    # Fetch the Record in EAD schema
    rec = result.fetch_record(session, schema='ead')
    # Use a Workflow to return the <unitid> based identifier
    new_id = unitidWf.process(session, rec)
    # Assemble the URI
    uri = "{0}/{1}".format(baseDataUri, new_id)
    # Write the URI to the output stream
    outStream.write(uri + '\n')
    return 0

  3. bloomonkey revised this gist Oct 24, 2012. 1 changed file with 1 addition and 1 deletion.
    2 changes: 1 addition & 1 deletion subjectURIs.py
    Original file line number Diff line number Diff line change
    @@ -83,7 +83,7 @@ def printSubjectURIs(options, args):
    rec = rsi.fetch_record(session, schema='ead')
    new_id = unitidWf.process(session, rec)
    uri = "{0}/{1}".format(baseDataUri, new_id)
    outStream.write(uri)
    outStream.write(uri + '\n')
    return 0


  4. bloomonkey revised this gist Oct 24, 2012. 1 changed file with 2 additions and 2 deletions.
    4 changes: 2 additions & 2 deletions subjectURIs.py
    Original file line number Diff line number Diff line change
    @@ -68,9 +68,9 @@ def get_outputStream(options):

    def printSubjectURIs(options, args):
    if options.components:
    qString = "dc.subject exact {0}".format(' '.join(args[1:]))
    qString = 'dc.subject exact "{0}"'.format(' '.join(args[1:]))
    else:
    qString = "dc.subject exact {0} and ead.istoplevel=1".format(' '.join(args[1:]))
    qString = 'dc.subject exact "{0}" and ead.istoplevel=1'.format(' '.join(args[1:]))
    try:
    q = qf.get_query(session, qString)
    except:
  5. bloomonkey revised this gist Oct 24, 2012. 1 changed file with 1 addition and 1 deletion.
    2 changes: 1 addition & 1 deletion subjectURIs.py
    Original file line number Diff line number Diff line change
    @@ -46,7 +46,7 @@ def __init__(self, **kwargs):
    self.add_option(
    "-c",
    "--components",
    dest="component",
    dest="components",
    action="store_true",
    default=False,
    help="Include component URIs"
  6. bloomonkey revised this gist Oct 24, 2012. 1 changed file with 2 additions and 1 deletion.
    3 changes: 2 additions & 1 deletion subjectURIs.py
    Original file line number Diff line number Diff line change
    @@ -7,6 +7,7 @@
    import sys
    import os

    from contextlib import contextmanager
    from optparse import OptionParser

    cheshirePath = os.environ.get('C3HOME', os.path.expanduser('~/cheshire3'))
    @@ -99,4 +100,4 @@ def printSubjectURIs(options, args):

    if __name__ == '__main__':
    (options, args) = option_parser.parse_args(sys.argv)
    sys.exit(printSubjectURIs(options, args))
    sys.exit(printSubjectURIs(options, args))
  7. bloomonkey created this gist Oct 24, 2012.
    102 changes: 102 additions & 0 deletions subjectURIs.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,102 @@
    """Generate Archives Hub URIs for records matching a given subject.
    Usage: %prog [options] subject
    """

    import sys
    import os

    from optparse import OptionParser

    cheshirePath = os.environ.get('C3HOME', os.path.expanduser('~/cheshire3'))
    # Hack sys.path to look for over-ridden cheshire3 modules
    sys.path.insert(1, os.path.join(cheshirePath, 'cheshire3', 'code'))

    # Cheshire3 imports
    from cheshire3.baseObjects import Session
    from cheshire3.server import SimpleServer

    # Build environment...
    session = Session()
    serv = SimpleServer(session,
    os.path.join(cheshirePath,
    'cheshire3',
    'configs',
    'serverConfig.xml')
    )


    class MyOptionParser(OptionParser):
    """Custom option parser for outputting list of record URIs."""

    def __init__(self, **kwargs):
    OptionParser.__init__(self, **kwargs)
    # Options
    self.add_option(
    "-o",
    "--output",
    dest="outfile",
    default=None,
    help=" ".join(["Write output to OUTFILE.",
    "If omitted output will be printed to STDOUT."]),
    metavar="OUTFILE"
    )
    self.add_option(
    "-c",
    "--components",
    dest="component",
    action="store_true",
    default=False,
    help="Include component URIs"
    )


    @contextmanager
    def get_outputStream(options):
    """Context manager for appropriate output stream based on given options."""
    if options.outfile is not None:
    f = open(os.path.abspath(options.outfile), 'w')
    try:
    yield f
    finally:
    f.close()
    else:
    yield sys.stdout


    def printSubjectURIs(options, args):
    if options.components:
    qString = "dc.subject exact {0}".format(' '.join(args[1:]))
    else:
    qString = "dc.subject exact {0} and ead.istoplevel=1".format(' '.join(args[1:]))
    try:
    q = qf.get_query(session, qString)
    except:
    session.logger.log_error(session, "Invalid CQL: {0}".format(qString))
    return 1
    rs = db.search(session, q)
    unitidWf = db.get_object(session, 'unitidIdentifierWorkflow')
    with get_outputStream(options) as outStream:
    for rsi in rs:
    rec = rsi.fetch_record(session, schema='ead')
    new_id = unitidWf.process(session, rec)
    uri = "{0}/{1}".format(baseDataUri, new_id)
    outStream.write(uri)
    return 0


    db = serv.get_object(session, 'db_hub')
    session.database = db.id
    session.logger = db.get_path(session, 'defaultLogger')
    qf = db.get_object(session, 'defaultQueryFactory')

    hubBaseUrl = 'http://archiveshub.ac.uk'
    apacheDataUrl = '/data'
    baseDataUri = '{0}{1}'.format(hubBaseUrl, apacheDataUrl)
    option_parser = MyOptionParser()


    if __name__ == '__main__':
    (options, args) = option_parser.parse_args(sys.argv)
    sys.exit(printSubjectURIs(options, args))