import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.IOException; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; import javax.xml.parsers.ParserConfigurationException; import javax.xml.parsers.SAXParser; import javax.xml.parsers.SAXParserFactory; import javax.xml.stream.XMLStreamException; import javax.xml.stream.XMLStreamWriter; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.xml.sax.Attributes; import org.xml.sax.InputSource; import org.xml.sax.SAXException; import org.xml.sax.helpers.DefaultHandler; import ResultEntry; import SearchResultSet; import eu.clarin.sru.server.SRUConstants; import eu.clarin.sru.server.SRUDiagnostic; import eu.clarin.sru.server.SRUDiagnosticList; import eu.clarin.sru.server.SRUException; import eu.clarin.sru.server.SRURequest; import eu.clarin.sru.server.SRUSearchResultSet; import eu.clarin.sru.server.SRUServerConfig; import eu.clarin.sru.server.fcs.XMLStreamWriterHelper; public class SAWSRUSearchResultSet extends SRUSearchResultSet { private static final Logger LOGGER = LogManager.getLogger(SAWSRUSearchResultSet.class); protected static final SAXParserFactory factory; static { factory = SAXParserFactory.newInstance(); factory.setNamespaceAware(true); factory.setValidating(false); factory.setXIncludeAware(false); } SRUServerConfig serverConfig = null; SRURequest request = null; private Set extraDataviews; private SearchResultSet results; private int currentRecordCursor = 0; protected SAWSRUSearchResultSet(SRUServerConfig serverConfig, SRURequest request, SRUDiagnosticList diagnostics, List dataviews, SearchResultSet results) { super(diagnostics); this.serverConfig = serverConfig; this.request = request; this.results = results; currentRecordCursor = -1; extraDataviews = new HashSet<>(dataviews); } @Override public String getRecordIdentifier() { return null; } @Override public String getRecordSchemaIdentifier() { return request.getRecordSchemaIdentifier() != null ? request.getRecordSchemaIdentifier() : SAWSRUConstants.CLARIN_FCS_RECORD_SCHEMA; } @Override public SRUDiagnostic getSurrogateDiagnostic() { if ((getRecordSchemaIdentifier() != null) && !SAWSRUConstants.CLARIN_FCS_RECORD_SCHEMA.equals(getRecordSchemaIdentifier())) { return new SRUDiagnostic( SRUConstants.SRU_RECORD_NOT_AVAILABLE_IN_THIS_SCHEMA, getRecordSchemaIdentifier(), "Record is not available in record schema \"" + getRecordSchemaIdentifier() + "\"."); } return null; } @Override public int getTotalRecordCount() { return (int) results.getTotal(); } @Override public int getRecordCount() { return results.getResults().size(); } @Override public boolean nextRecord() throws SRUException { if (currentRecordCursor < (getRecordCount() - 1)) { currentRecordCursor++; return true; } return false; } @Override public void writeRecord(XMLStreamWriter writer) throws XMLStreamException { ResultEntry result = results.getResults().get(currentRecordCursor); XMLStreamWriterHelper.writeStartResource(writer, results.getPid(), null); XMLStreamWriterHelper.writeStartResourceFragment(writer, result.lemma, result.landingpage); if (request != null && request.isQueryType(SAWSRUConstants.SRU_QUERY_TYPE_LEX)) { writeLexHitsDataview(writer, result); } else { writeHitsDataview(writer, result); } XMLStreamWriterHelper.writeEndResourceFragment(writer); XMLStreamWriterHelper.writeEndResource(writer); } protected void writeHitsDataview(XMLStreamWriter writer, ResultEntry result) throws XMLStreamException { XMLStreamWriterHelper.writeStartDataView(writer, SAWSRUConstants.FCS_HITS_MIMETYPE); writer.setPrefix(SAWSRUConstants.FCS_HITS_PREFIX, SAWSRUConstants.FCS_HITS_NS); writer.writeStartElement(SAWSRUConstants.FCS_HITS_NS, "Result"); writer.writeNamespace(SAWSRUConstants.FCS_HITS_PREFIX, SAWSRUConstants.FCS_HITS_NS); writeSolrHitsDataviewBytedXMLDoc(writer, result.dataview_hits.getBytes()); writer.writeEndElement(); // "Result" element XMLStreamWriterHelper.writeEndDataView(writer); } protected void writeLexHitsDataview(XMLStreamWriter writer, ResultEntry result) throws XMLStreamException { XMLStreamWriterHelper.writeStartDataView(writer, SAWSRUConstants.FCS_HITS_MIMETYPE); writer.setPrefix(SAWSRUConstants.FCS_HITS_PREFIX, SAWSRUConstants.FCS_HITS_NS); writer.writeStartElement(SAWSRUConstants.FCS_HITS_NS, "Result"); writer.writeNamespace(SAWSRUConstants.FCS_HITS_PREFIX, SAWSRUConstants.FCS_HITS_NS); writeSolrHitsDataviewBytedXMLDoc(writer, result.dataview_lexhits.getBytes()); writer.writeEndElement(); // "Result" element XMLStreamWriterHelper.writeEndDataView(writer); } /** * Helper method for {@link #writeLexHitsDataview(XMLStreamWriter, ResultEntry)} * and {@link #writeHitsDataview(XMLStreamWriter, ResultEntry)} to write an XML * string to output. Also adds the hits: prefixes. * * @param writer * @param bytes * @throws XMLStreamException */ protected static void writeSolrHitsDataviewBytedXMLDoc(XMLStreamWriter writer, byte[] bytes) throws XMLStreamException { final String marker = "writeSolrHitsDataviewBytedXMLDoc"; try { ByteArrayOutputStream baos = new ByteArrayOutputStream(); baos.write(("<" + marker + ">").getBytes()); baos.write(bytes); baos.write(("").getBytes()); bytes = baos.toByteArray(); // LOGGER.info("bytes: {}", new String(bytes)); ByteArrayInputStream bais = new ByteArrayInputStream(bytes); InputSource input = new InputSource(bais); SAXParser parser = factory.newSAXParser(); parser.parse(input, new DefaultHandler() { public boolean isBlank(final String s) { // from: org.apache.logging.log4j.util.Strings.isBlank() if (s == null || s.isEmpty()) { return true; } for (int i = 0; i < s.length(); i++) { char c = s.charAt(i); if (!Character.isWhitespace(c)) { return false; } } return true; } @Override public void characters(char[] ch, int start, int length) throws SAXException { // LOGGER.info("characters: {}", Arrays.copyOfRange(ch, start, start + length)); // strip blanks // TODO: maybe with indent == 0, just check for single line-breaks after element ends? if (isBlank(new String(ch, start, length))) { return; } try { writer.writeCharacters(ch, start, length); } catch (XMLStreamException e) { throw new SAXException(e); } } @Override public void endElement(String uri, String localName, String qName) throws SAXException { if (qName.equals(marker)) { return; } try { writer.writeEndElement(); } catch (XMLStreamException e) { throw new SAXException(e); } } private Map prefixes = new HashMap<>(); @Override public void startPrefixMapping(String prefix, String uri) throws SAXException { super.startPrefixMapping(prefix, uri); // writer.writeNamespace(prefix, uri); prefixes.put(prefix, uri); } @Override public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException { if (qName.equals(marker)) { return; } try { if (qName.equals("Hit")) { writer.writeStartElement(SAWSRUConstants.FCS_HITS_NS, qName); } else { writer.writeStartElement(qName); // writer.writeStartElement(qName, localName, uri); } if (!prefixes.isEmpty()) { for (Map.Entry entry : prefixes.entrySet()) { writer.writeNamespace(entry.getKey(), entry.getValue()); } prefixes.clear(); } for (int i = 0; i < attributes.getLength(); i++) { writer.writeAttribute(attributes.getQName(i), attributes.getValue(i)); } } catch (XMLStreamException e) { throw new SAXException(e); } } }); } catch (ParserConfigurationException e) { throw new XMLStreamException(e); } catch (SAXException e) { throw new XMLStreamException(e); } catch (IOException e) { throw new XMLStreamException(e); } } }