OpenTREP Logo  0.07.11
C++ Open Travel Request Parsing Library
Result.cpp
Go to the documentation of this file.
1 // //////////////////////////////////////////////////////////////////////
2 // Import section
3 // //////////////////////////////////////////////////////////////////////
4 // STL
5 #include <cassert>
6 #include <sstream>
7 #include <algorithm>
8 // Boost
9 #include <boost/tokenizer.hpp>
10 // OpenTREP
11 #include <opentrep/LocationKey.hpp>
13 #include <opentrep/bom/Filter.hpp>
17 #include <opentrep/bom/Place.hpp>
18 #include <opentrep/bom/Result.hpp>
21 
22 namespace OPENTREP {
23 
24  // //////////////////////////////////////////////////////////////////////
25  Result::Result (const TravelQuery_T& iQueryString,
26  const Xapian::Database& iDatabase)
27  : _resultHolder (NULL), _database (iDatabase),
28  _queryString (iQueryString), _hasFullTextMatched (false),
29  _bestDocData (RawDataString_T ("")) {
30  init();
31  }
32 
33  // //////////////////////////////////////////////////////////////////////
34  Result::~Result() {
35  }
36 
37  // //////////////////////////////////////////////////////////////////////
38  void Result::init() {
39  }
40 
41  // //////////////////////////////////////////////////////////////////////
42  std::string Result::describeShortKey() const {
43  std::ostringstream oStr;
44  oStr << _queryString;
45  return oStr.str();
46  }
47 
48  // //////////////////////////////////////////////////////////////////////
49  std::string Result::describeKey() const {
50  std::ostringstream oStr;
51  oStr << "'" << describeShortKey() << "' ";
52  if (_correctedQueryString.empty() == false
53  && _correctedQueryString != _queryString) {
54  oStr << "(corrected into '" << _correctedQueryString
55  << "' with an edit distance/error of " << _editDistance
56  << " over an allowable distance of " << _allowableEditDistance
57  << ") - ";
58  } else {
59  oStr << "- ";
60  }
61  return oStr.str();
62  }
63 
64  // //////////////////////////////////////////////////////////////////////
65  std::string Result::toString() const {
66  std::ostringstream oStr;
67  oStr << describeKey();
68 
69  if (_documentList.empty() == true) {
70  oStr << "No match" << std::endl;
71  return oStr.str();
72  }
73  assert (_hasFullTextMatched == true);
74 
75  unsigned short idx = 0;
76  for (DocumentList_T::const_iterator itDoc = _documentList.begin();
77  itDoc != _documentList.end(); ++itDoc, ++idx) {
78  const XapianDocumentPair_T& lDocumentPair = *itDoc;
79 
80  const Xapian::Document& lXapianDoc = lDocumentPair.first;
81  const Xapian::docid& lDocID = lXapianDoc.get_docid();
82 
83  const ScoreBoard& lScoreBoard = lDocumentPair.second;
84 
85  if (idx != 0) {
86  oStr << ", ";
87  }
88  oStr << "Doc ID: " << lDocID << ", matching with ("
89  << lScoreBoard.describe() << "), containing: '"
90  << lXapianDoc.get_data() << "'";
91  }
92 
93  return oStr.str();
94  }
95 
96  // //////////////////////////////////////////////////////////////////////
97  void Result::toStream (std::ostream& ioOut) const {
98  ioOut << toString();
99  }
100 
101  // //////////////////////////////////////////////////////////////////////
102  void Result::fromStream (std::istream& ioIn) {
103  }
104 
105  // //////////////////////////////////////////////////////////////////////
107  getDocumentPair (const Xapian::docid& iDocID) const {
108  // Retrieve the Xapian document and associated ScoreBoard structure
109  // corresponding to the doc ID of the best matching document
110  DocumentMap_T::const_iterator itDoc = _documentMap.find (iDocID);
111 
112  if (itDoc == _documentMap.end()) {
113  OPENTREP_LOG_ERROR ("The Xapian document (ID = " << iDocID
114  << ") can not be found in the Result object "
115  << describeKey());
116  }
117  assert (itDoc != _documentMap.end());
118 
119  //
120  const XapianDocumentPair_T& oDocumentPair = itDoc->second;
121 
122  //
123  return oDocumentPair;
124  }
125 
126  // //////////////////////////////////////////////////////////////////////
127  const Xapian::Document& Result::
128  getDocument (const Xapian::docid& iDocID) const {
129  // First, retrieve the pair made of Xapian document and associated
130  // ScoreBoard structure
131  const XapianDocumentPair_T& lDocumentPair = getDocumentPair (iDocID);
132 
133  // Then, take the Xapian document (and leave the ScoreBoard out)
134  const Xapian::Document& oXapianDocument = lDocumentPair.first;
135 
136  //
137  return oXapianDocument;
138  }
139 
140  // //////////////////////////////////////////////////////////////////////
141  void Result::addDocument (const Xapian::Document& iDocument,
142  const Score_T& iScore) {
151  Score_T lCorrectedScore = iScore;
152  if (_editDistance > 0) {
153  lCorrectedScore = iScore / (_editDistance * _editDistance * _editDistance);
154  }
155 
156  // The document is created at the time of (Xapian-based) full-text matching
157  const ScoreType lXapianScoreType (ScoreType::XAPIAN_PCT);
158 
159  // Create a ScoreBoard structure
160  const ScoreBoard lScoreBoard (_queryString,
161  lXapianScoreType, lCorrectedScore);
162 
163  // Retrieve the ID of the Xapian document
164  const Xapian::docid& lDocID = iDocument.get_docid();
165 
175  // Create a (Xapian document, score board) pair, so as to store
176  // the document along with its corresponding score board
177  const XapianDocumentPair_T lDocumentPair (iDocument, lScoreBoard);
178 
179  // Insert the just created pair into the dedicated (STL) list
180  _documentList.push_back (lDocumentPair);
181 
182  // Insert the just created pair into the dedicated (STL) map
183  const bool hasInsertBeenSuccessful =
184  _documentMap.insert (DocumentMap_T::value_type (lDocID,
185  lDocumentPair)).second;
186  // Sanity check
187  if (hasInsertBeenSuccessful == false) {
188  std::ostringstream errorStr;
189  errorStr << "Error while inserting the Xapian Document pair into "
190  << "the internal STL map";
191  OPENTREP_LOG_DEBUG (errorStr.str());
192  }
193  assert (hasInsertBeenSuccessful == true);
194  }
195 
196  // //////////////////////////////////////////////////////////////////////
197  void Result::fillResult (const Xapian::MSet& iMatchingSet) {
202  for (Xapian::MSetIterator itDoc = iMatchingSet.begin();
203  itDoc != iMatchingSet.end(); ++itDoc) {
204  const int& lXapianPercentage = itDoc.get_percent();
205  const Xapian::Document& lDocument = itDoc.get_document();
206  addDocument (lDocument, lXapianPercentage);
207  }
208  }
209 
210  // //////////////////////////////////////////////////////////////////////
211  void Result::fillPlace (Place& ioPlace) const {
212  // Set the original and corrected/suggested keywords
213  ioPlace.setOriginalKeywords (_queryString);
214  ioPlace.setCorrectedKeywords (_correctedQueryString);
215 
216  // Set the effective (Levenshtein) edit distance/error, as
217  // well as the allowable edit distance/error
218  ioPlace.setEditDistance (_editDistance);
219  ioPlace.setAllowableEditDistance (_allowableEditDistance);
220 
221  // Set the Xapian document ID
222  ioPlace.setDocID (_bestDocID);
223 
224  // Set the matching percentage
225  ioPlace.setPercentage (_bestCombinedWeight);
226 
227  // Retrieve the parameters of the best matching document
228  const LocationKey& lKey = ioPlace.getKey();
229 
230  // DEBUG
231  OPENTREP_LOG_DEBUG ("Place key: " << lKey << " - Xapian ID " << _bestDocID
232  << ", " << _bestCombinedWeight << "% [" << _bestDocData
233  << "]");
234  }
235 
247  // //////////////////////////////////////////////////////////////////////
248  static unsigned int calculateEditDistance (const TravelQuery_T& iPhrase) {
249  NbOfErrors_T oEditDistance = 2;
250 
251  const NbOfErrors_T lQueryStringSize = iPhrase.size();
252 
253  oEditDistance = lQueryStringSize / K_DEFAULT_SIZE_FOR_SPELLING_ERROR_UNIT;
254  return oEditDistance;
255  }
256 
257  // //////////////////////////////////////////////////////////////////////
259  // Initialise the POR (point of reference) parser
260  PORStringParser lStringParser (iRawDataString);
261 
262  // Parse the raw data
263  const Location& oLocation = lStringParser.generateLocation();
264 
265  // DEBUG
266  //OPENTREP_LOG_DEBUG ("Location: " << oLocation);
267 
268  return oLocation;
269  }
270 
271  // //////////////////////////////////////////////////////////////////////
272  Location Result::retrieveLocation (const Xapian::Document& iDocument) {
273  // Retrieve the Xapian document data
274  const std::string& lDocumentDataStr = iDocument.get_data();
275  const RawDataString_T& lDocumentData = RawDataString_T (lDocumentDataStr);
276 
277  // Parse the POR details and create the corresponding Location structure
278  const Location& oLocation = retrieveLocation (lDocumentData);
279 
280  return oLocation;
281  }
282 
283  // //////////////////////////////////////////////////////////////////////
284  LocationKey Result::getPrimaryKey (const Xapian::Document& iDocument) {
285  // Parse the POR (point of reference) details held by the Xapian document
286  const Location& lLocation = retrieveLocation (iDocument);
287 
288  // Get the key (IATA and ICAO codes, GeonamesID)
289  const LocationKey& oLocationKey = lLocation.getKey();
290 
291  return oLocationKey;
292  }
293 
294  // //////////////////////////////////////////////////////////////////////
295  Score_T Result::getEnvelopeID (const Xapian::Document& iDocument) {
296  // Parse the POR (point of reference) details held by the Xapian document
297  const Location& lLocation = retrieveLocation (iDocument);
298 
299  // Get the envelope ID (it is an integer value in the Location structure)
300  const EnvelopeID_T& lEnvelopeIDInt = lLocation.getEnvelopeID();
301 
302  // Convert the envelope ID value, from an integer to a floating point one
303  const Score_T oEnvelopeID = static_cast<const Score_T> (lEnvelopeIDInt);
304 
305  return oEnvelopeID;
306  }
307 
308  // //////////////////////////////////////////////////////////////////////
309  PageRank_T Result::getPageRank (const Xapian::Document& iDocument) {
310  // Parse the POR (point of reference) details held by the Xapian document
311  const Location& lLocation = retrieveLocation (iDocument);
312 
313  // Get the PageRank value
314  const PageRank_T& oPageRank = lLocation.getPageRank();
315 
316  return oPageRank;
317  }
318 
319  // //////////////////////////////////////////////////////////////////////
320  std::string Result::fullTextMatch (const Xapian::Database& iDatabase,
321  const TravelQuery_T& iQueryString,
322  Xapian::MSet& ioMatchingSet) {
323  std::string oMatchedString;
324 
325  // Catch any Xapian::Error exceptions thrown
326  try {
327 
328  // Build the query object
329  Xapian::QueryParser lQueryParser;
330  lQueryParser.set_database (iDatabase);
331 
337  // lQueryParser.set_default_op (Xapian::Query::OP_ADJ);
338  lQueryParser.set_default_op (Xapian::Query::OP_PHRASE);
339 
340  // DEBUG
341  /*
342  OPENTREP_LOG_DEBUG ("Query parser `" << lQueryParser.get_description()
343  << "'");
344  */
345 
346  // DEBUG
347  OPENTREP_LOG_DEBUG (" --------");
348 
349  // Start an enquire session
350  Xapian::Enquire enquire (iDatabase);
351 
358  const Xapian::Query& lXapianQuery =
359  lQueryParser.parse_query (iQueryString,
360  Xapian::QueryParser::FLAG_BOOLEAN
361  | Xapian::QueryParser::FLAG_PHRASE
362  | Xapian::QueryParser::FLAG_LOVEHATE);
363 
364  // Give the query object to the enquire session
365  enquire.set_query (lXapianQuery);
366 
367  // Get the top K_DEFAULT_XAPIAN_MATCHING_SET_SIZE (normally, 30)
368  // results of the query
369  ioMatchingSet = enquire.get_mset (0, K_DEFAULT_XAPIAN_MATCHING_SET_SIZE);
370 
371  // Display the results
372  int nbMatches = ioMatchingSet.size();
373 
374  // DEBUG
375  OPENTREP_LOG_DEBUG (" Query string: `" << iQueryString
376  << "', i.e.: `" << lXapianQuery.get_description()
377  << "' => " << nbMatches << " result(s) found");
378 
379  if (nbMatches != 0) {
380  // Store the effective (Levenshtein) edit distance/error
381  const NbOfErrors_T lEditDistance = 0;
382  setEditDistance (lEditDistance);
383 
384  // Store the allowable edit distance/error
385  setAllowableEditDistance (lEditDistance);
386 
387  //
388  oMatchedString = iQueryString;
389 
390  // Store the fact that there has been a full-text match
391  setHasFullTextMatched (true);
392 
393  // Store the corrected string (the same as the given string, here,
394  // as that latter directly gave full-text matches).
395  setCorrectedQueryString (oMatchedString);
396 
397  // DEBUG
398  OPENTREP_LOG_DEBUG (" Query string: `" << iQueryString
399  << "' provides " << nbMatches << " exact matches.");
400 
401  return oMatchedString;
402  }
403  assert (ioMatchingSet.empty() == true);
404 
410  const NbOfErrors_T& lAllowableEditDistance =
411  calculateEditDistance (iQueryString);
412 
413  // Let Xapian find a spelling correction (if any)
414  const std::string& lCorrectedString =
415  iDatabase.get_spelling_suggestion (iQueryString, lAllowableEditDistance);
416 
417  // If the correction is no better than the original string, there is
418  // no need to go further: there is no match.
419  if (lCorrectedString.empty() == true || lCorrectedString == iQueryString) {
420  // DEBUG
421  OPENTREP_LOG_DEBUG (" Query string: `"
422  << iQueryString << "' provides no match, "
423  << "and there is no spelling suggestion, "
424  << "even with an edit distance of "
425  << lAllowableEditDistance);
426 
427  // Store the fact that there has not been any full-text match
428  setHasFullTextMatched (false);
429 
430  // Leave the string empty
431  return oMatchedString;
432  }
433  assert (lCorrectedString.empty() == false
434  && lCorrectedString != iQueryString);
435 
436  // Calculate the effective (Levenshtein) edit distance/error
437  const NbOfErrors_T& lEditDistance =
438  Levenshtein::getDistance (iQueryString, lCorrectedString);
439 
447  const Xapian::Query& lCorrectedXapianQuery =
448  lQueryParser.parse_query (lCorrectedString,
449  Xapian::QueryParser::FLAG_BOOLEAN
450  | Xapian::QueryParser::FLAG_PHRASE
451  | Xapian::QueryParser::FLAG_LOVEHATE);
452 
453  // Retrieve a maximum of K_DEFAULT_XAPIAN_MATCHING_SET_SIZE (normally,
454  // 30) entries
455  enquire.set_query (lCorrectedXapianQuery);
456  ioMatchingSet = enquire.get_mset (0, K_DEFAULT_XAPIAN_MATCHING_SET_SIZE);
457 
458  // Display the results
459  nbMatches = ioMatchingSet.size();
460 
461  // DEBUG
462  OPENTREP_LOG_DEBUG (" Corrected query string: `" << lCorrectedString
463  << "', i.e.: `"
464  << lCorrectedXapianQuery.get_description()
465  << "' => " << nbMatches << " result(s) found");
466 
467  if (nbMatches != 0) {
468  // Store the effective (Levenshtein) edit distance/error
469  setEditDistance (lEditDistance);
470 
471  // Store the allowable edit distance/error
472  setAllowableEditDistance (lAllowableEditDistance);
473 
474  //
475  oMatchedString = lCorrectedString;
476 
477  // Store the fact that there has been a full-text match
478  setHasFullTextMatched (true);
479 
480  // Store the corrected string
481  setCorrectedQueryString (oMatchedString);
482 
483  // DEBUG
484  OPENTREP_LOG_DEBUG (" Query string: `"
485  << iQueryString << "', spelling suggestion: `"
486  << lCorrectedString
487  << "', with a Levenshtein edit distance of "
488  << lEditDistance
489  << " over an allowable edit distance of "
490  << lAllowableEditDistance << ", provides "
491  << nbMatches << " matches.");
492 
493  //
494  return oMatchedString;
495  }
496 
497  // Error
498  OPENTREP_LOG_ERROR (" Query string: `"
499  << iQueryString << "', spelling suggestion: `"
500  << lCorrectedString
501  << "', with a Levenshtein edit distance of "
502  << lEditDistance
503  << " over an allowable edit distance of "
504  << lAllowableEditDistance << ", provides no match, "
505  << "which is not consistent with the existence of "
506  << "the spelling correction.");
507  assert (false);
508 
509  } catch (const Xapian::Error& error) {
510  OPENTREP_LOG_ERROR ("Exception: " << error.get_msg());
511  throw XapianException (error.get_msg());
512  }
513 
514  // Store the fact that there has not been any full-text match
515  setHasFullTextMatched (false);
516 
517  return oMatchedString;
518  }
519 
520  // //////////////////////////////////////////////////////////////////////
521  std::string Result::fullTextMatch (const Xapian::Database& iDatabase,
522  const TravelQuery_T& iQueryString) {
523  std::string oMatchedString;
524 
525  // Catch any Xapian::Error exceptions thrown
526  try {
527 
528  // DEBUG
529  OPENTREP_LOG_DEBUG (" ----------------");
530  OPENTREP_LOG_DEBUG (" Current query string: '"<< iQueryString << "'");
531 
532  // Check whether the string should be filtered out
533  const bool isToBeAdded = Filter::shouldKeep ("", iQueryString);
534  //const bool isToBeAdded = true;
535 
536  Xapian::MSet lMatchingSet;
537  if (isToBeAdded == true) {
538  oMatchedString = fullTextMatch (iDatabase, iQueryString, lMatchingSet);
539  }
540 
541  // Create the corresponding documents (from the Xapian MSet object)
542  fillResult (lMatchingSet);
543 
544  // DEBUG
545  if (isToBeAdded == false) {
546  OPENTREP_LOG_DEBUG (" No full text search performed as '"
547  << iQueryString
548  << "' is not made of searchable words");
549  }
550  OPENTREP_LOG_DEBUG (" ==> " << toString());
551  OPENTREP_LOG_DEBUG (" ----------------");
552 
553  } catch (const Xapian::Error& error) {
554  OPENTREP_LOG_ERROR ("Xapian-related error: " << error.get_msg());
555  throw XapianException (error.get_msg());
556  }
557 
558  return oMatchedString;
559  }
560 
561  // //////////////////////////////////////////////////////////////////////
563  // Browse the list of Xapian documents
564  for (DocumentList_T::const_iterator itDoc = _documentList.begin();
565  itDoc != _documentList.end(); ++itDoc) {
566  const XapianDocumentPair_T& lDocumentPair = *itDoc;
567 
568  // Retrieve the Xapian document
569  const Xapian::Document& lXapianDoc = lDocumentPair.first;
570 
571  // Extract the Xapian document ID
572  const Xapian::docid& lDocID = lXapianDoc.get_docid();
573 
574  // Extract the envelope ID from the document data
575  const LocationKey& lLocationKey = getPrimaryKey (lXapianDoc);
576 
577  // Retrieve the score board for that Xapian document
578  const ScoreBoard& lScoreBoard = lDocumentPair.second;
579 
580  // Extract the Xapian matching percentage
581  const Score_T& lXapianPct = lScoreBoard.getScore (ScoreType::XAPIAN_PCT);
582 
583  // DEBUG
584  OPENTREP_LOG_NOTIFICATION (" [xapian] '" << describeShortKey()
585  << "' with (" << lLocationKey << ", doc ID = "
586  << lDocID << ") matches at " << lXapianPct
587  << "%");
588  }
589  }
590 
591  // //////////////////////////////////////////////////////////////////////
592  void Result::setScoreOnDocMap (const Xapian::docid& iDocID,
593  const ScoreType& iType, const Score_T& iScore) {
594  // Retrieve the Xapian document and associated ScoreBoard structure
595  // corresponding to the given doc ID
596  DocumentMap_T::iterator itDoc = _documentMap.find (iDocID);
597 
598  if (itDoc == _documentMap.end()) {
599  OPENTREP_LOG_ERROR ("The Xapian document (ID = " << iDocID
600  << ") can not be found in the Result object "
601  << describeKey());
602  }
603  assert (itDoc != _documentMap.end());
604 
605  // Retrieve the associated ScoreBoard structure
606  XapianDocumentPair_T& lXapianDocPair = itDoc->second;
607  ScoreBoard& lScoreBoard = lXapianDocPair.second;
608 
609  // Update the score/weight
610  lScoreBoard.setScore (iType, iScore);
611  }
612 
613  // //////////////////////////////////////////////////////////////////////
615  // Browse the list of Xapian documents
616  for (DocumentList_T::iterator itDoc = _documentList.begin();
617  itDoc != _documentList.end(); ++itDoc) {
618  XapianDocumentPair_T& lDocumentPair = *itDoc;
619 
620  // Retrieve the Xapian document
621  const Xapian::Document& lXapianDoc = lDocumentPair.first;
622 
623  // Extract the Xapian document ID
624  const Xapian::docid& lDocID = lXapianDoc.get_docid();
625 
626  // Extract the envelope ID from the document data
627  const LocationKey& lLocationKey = getPrimaryKey (lXapianDoc);
628 
629  // Extract the envelope ID from the document data
630  const EnvelopeID_T& lEnvelopeIDInt = getEnvelopeID (lXapianDoc);
631 
632  // DEBUG
633  if (lEnvelopeIDInt != 0) {
635  << "] (" << lLocationKey << ", doc ID = "
636  << lDocID << ") has a non-null envelope ID ("
637  << lEnvelopeIDInt << ") => match of 0.10%");
638  }
639 
640  // Convert the envelope ID value, from an integer to a floating point one
641  const Score_T lEnvelopeID = static_cast<const Score_T> (lEnvelopeIDInt);
642 
643  // Retrieve the score board for that Xapian document
644  ScoreBoard& lScoreBoard = lDocumentPair.second;
645 
646  // Store the envelope-related weight
647  lScoreBoard.setScore (ScoreType::ENV_ID, lEnvelopeID);
648  setScoreOnDocMap (lDocID, ScoreType::ENV_ID, lEnvelopeID);
649  }
650  }
651 
652  // //////////////////////////////////////////////////////////////////////
654  // Browse the list of Xapian documents
655  for (DocumentList_T::iterator itDoc = _documentList.begin();
656  itDoc != _documentList.end(); ++itDoc) {
657  XapianDocumentPair_T& lDocumentPair = *itDoc;
658 
659  // Retrieve the Xapian document
660  const Xapian::Document& lXapianDoc = lDocumentPair.first;
661 
662  // Extract the Xapian document ID
663  const Xapian::docid& lDocID = lXapianDoc.get_docid();
664 
665  // Extract the envelope ID from the document data
666  const LocationKey& lLocationKey = getPrimaryKey (lXapianDoc);
667 
668  // Initialisation of the IATA/ICAO code full matching percentage
669  Score_T lCodeMatchPct = 0.0;
670  bool hasCodeFullyMatched = false;
671 
672  // Filter out "standard" words such as "airport", "international",
673  // "city", as well as words having a length strictly less than
674  // 3 letters.
675  std::string lFilteredString (_queryString);
676  const NbOfLetters_T kMinWordLength = 3;
677  Filter::trim (lFilteredString, kMinWordLength);
678 
679  // Check whether or not the filtered query string is made of
680  // a single word
681  WordList_T lFilteredQueryWordList;
683  lFilteredQueryWordList);
684  const NbOfWords_T nbOfFilteredQueryWords = lFilteredQueryWordList.size();
685 
686  //
687  if (_hasFullTextMatched == true) {
694  const size_t lNbOfLetters = lFilteredString.size();
695  if (nbOfFilteredQueryWords == 1
696  && lNbOfLetters >= 3 && lNbOfLetters <= 4
697  && _correctedQueryString == _queryString) {
698  // Convert the query string (made of one word of 3 or 4 letters)
699  // to uppercase letters
700  std::string lUpperQueryWord;
701  lUpperQueryWord.resize (lNbOfLetters);
702  std::transform (lFilteredString.begin(), lFilteredString.end(),
703  lUpperQueryWord.begin(), ::toupper);
704 
705  // Retrieve with the IATA code
706  const IATACode_T& lIataCode = lLocationKey.getIataCode();
707 
708  // Compare the 3/4-letter-word query string with the IATA
709  // and ICAO codes
710  if (lUpperQueryWord == lIataCode) {
714  lCodeMatchPct = 1.0;
715  hasCodeFullyMatched = true;
716  }
717  }
718 
719  if (hasCodeFullyMatched == true) {
720  // DEBUG
722  << "' matches the IATA/ICAO code ("
723  << lLocationKey << ", doc ID = "
724  << lDocID << ") => match of "
726  } else {
727  // DEBUG
729  << "' does not match with the IATA/ICAO "
730  << "code (" << lLocationKey << ", doc ID = "
731  << lDocID << ") => match of "
733  }
734  }
735 
736  // Retrieve the score board for that Xapian document
737  ScoreBoard& lScoreBoard = lDocumentPair.second;
738 
739  // Store the IATA/ICAO code match percentage/weight
740  lScoreBoard.setScore (ScoreType::CODE_FULL_MATCH, lCodeMatchPct);
741  setScoreOnDocMap (lDocID, ScoreType::CODE_FULL_MATCH, lCodeMatchPct);
742  }
743  }
744 
745  // //////////////////////////////////////////////////////////////////////
747  // Browse the list of Xapian documents
748  for (DocumentList_T::iterator itDoc = _documentList.begin();
749  itDoc != _documentList.end(); ++itDoc) {
750  XapianDocumentPair_T& lDocumentPair = *itDoc;
751 
752  // Retrieve the Xapian document
753  const Xapian::Document& lXapianDoc = lDocumentPair.first;
754 
755  // Extract the Xapian document ID
756  const Xapian::docid& lDocID = lXapianDoc.get_docid();
757 
758  // Extract the envelope ID from the document data
759  const LocationKey& lLocationKey = getPrimaryKey (lXapianDoc);
760 
761  // Extract the PageRank from the document data
762  const Score_T& lPageRank = getPageRank (lXapianDoc);
763 
764  // DEBUG
766  << "] (" << lLocationKey << ", doc ID = "
767  << lDocID << ") has a PageRank of "
768  << lPageRank << "%");
769 
770  // Retrieve the score board for that Xapian document
771  ScoreBoard& lScoreBoard = lDocumentPair.second;
772 
773  // Store the PageRank weight
774  lScoreBoard.setScore (ScoreType::PAGE_RANK, lPageRank);
775  setScoreOnDocMap (lDocID, ScoreType::PAGE_RANK, lPageRank);
776  }
777  }
778 
779  // //////////////////////////////////////////////////////////////////////
786  }
787 
788  // //////////////////////////////////////////////////////////////////////
790  Percentage_T lMaxPercentage = 0.0;
791  std::string lBestDocData;
792 
793  // Browse the list of Xapian documents
794  Xapian::docid lBestDocID = 0;
795  for (DocumentList_T::iterator itDoc = _documentList.begin();
796  itDoc != _documentList.end(); ++itDoc) {
797  XapianDocumentPair_T& lDocumentPair = *itDoc;
798 
799  // Retrieve the Xapian document ID
800  const Xapian::Document& lXapianDoc = lDocumentPair.first;
801  const Xapian::docid& lDocID = lXapianDoc.get_docid();
802  const std::string& lDocData = lXapianDoc.get_data();
803 
808  ScoreBoard& lScoreBoard = lDocumentPair.second;
809  const Percentage_T& lPercentage = lScoreBoard.calculateCombinedWeight();
810 
820  // Register the document, if it is the best matching until now
821  if (lPercentage > lMaxPercentage) {
822  lMaxPercentage = lPercentage;
823  lBestDocID = lDocID;
824  lBestDocData = lDocData;
825  }
826  }
827 
828  // Check whether or not the (original) query string is made of a single word
829  WordList_T lOriginalQueryWordList;
831  lOriginalQueryWordList);
832  const NbOfWords_T nbOfOriginalQueryWords = lOriginalQueryWordList.size();
833 
834  //
835  if (_hasFullTextMatched == true) {
836  // Retrieve the primary key (IATA, location type, Geonames ID) of
837  // the place corresponding to the document
838  const XapianDocumentPair_T& lXapianDocPair = getDocumentPair (lBestDocID);
839  const Xapian::Document& lXapianDoc = lXapianDocPair.first;
840  const ScoreBoard& lScoreBoard = lXapianDocPair.second;
841  const LocationKey& lLocationKey = getPrimaryKey (lXapianDoc);
842 
843  // DEBUG
844  OPENTREP_LOG_DEBUG (" [pct] '" << describeShortKey()
845  << "' matches at " << lMaxPercentage
846  << "% for " << lLocationKey << " (doc ID = "
847  << lBestDocID << "). Score calculation: "
848  << lScoreBoard.describe());
849 
850  } else {
855  const bool shouldBeKept = Filter::shouldKeep ("", _queryString);
856 
857  if (nbOfOriginalQueryWords == 1 && shouldBeKept == true) {
863  lMaxPercentage = 100.0;
864 
865  // DEBUG
866  OPENTREP_LOG_DEBUG (" [pct] '" << describeShortKey()
867  << "' does not match, but it is a non black-listed "
868  << "single-word string; hence, the weight is "
869  << lMaxPercentage << "%");
870 
871  } else {
880  lMaxPercentage = std::pow (10.0, -3*nbOfOriginalQueryWords);
881 
882  // DEBUG
883  OPENTREP_LOG_DEBUG(" [pct] '" << describeShortKey()
884  << "' does not match, and is either a multiple-word "
885  << "string or black-listed; hence, the weight is "
886  << lMaxPercentage << "%");
887  }
888  }
889 
890  // Store the doc ID of the best matching document
891  setBestDocID (lBestDocID);
892 
893  // Store the best weight
894  setBestCombinedWeight (lMaxPercentage);
895 
896  // Store all the details of the Xapian document
897  setBestDocData (lBestDocData);
898  }
899 
900 }
void fromStream(std::istream &ioIn)
Definition: Result.cpp:102
void setEditDistance(const NbOfErrors_T &iEditDistance)
Definition: Place.hpp:898
void setCorrectedQueryString(const TravelQuery_T &iCorrectedQueryString)
Definition: Result.hpp:156
Class modelling the primary key of a location/POR (point of reference).
Definition: LocationKey.hpp:29
Structure holding a board for all the types of score/matching having been performed.
Definition: ScoreBoard.hpp:22
const NbOfErrors_T K_DEFAULT_SIZE_FOR_SPELLING_ERROR_UNIT
std::pair< Xapian::Document, ScoreBoard > XapianDocumentPair_T
Definition: Result.hpp:23
unsigned int EnvelopeID_T
#define OPENTREP_LOG_ERROR(iToBeLogged)
Definition: Logger.hpp:24
#define OPENTREP_LOG_DEBUG(iToBeLogged)
Definition: Logger.hpp:33
#define OPENTREP_LOG_NOTIFICATION(iToBeLogged)
Definition: Logger.hpp:27
double Percentage_T
static int getDistance(const std::string &iSource, const std::string &iTarget)
Definition: Levenshtein.cpp:13
double Score_T
void calculateCombinedWeights()
Definition: Result.cpp:789
static LocationKey getPrimaryKey(const Xapian::Document &)
Definition: Result.cpp:284
Structure modelling a (geographical) location.
Definition: Location.hpp:25
static Score_T getEnvelopeID(const Xapian::Document &)
Definition: Result.cpp:295
double PageRank_T
static PageRank_T getPageRank(const Xapian::Document &)
Definition: Result.cpp:309
unsigned int NbOfLetters_T
std::string describe() const
Definition: ScoreBoard.cpp:157
const XapianDocumentPair_T & getDocumentPair(const Xapian::docid &) const
Definition: Result.cpp:107
void setCorrectedKeywords(const std::string &iCorrectedKeywords)
Definition: Place.hpp:884
std::string toString() const
Definition: Result.cpp:65
static Location retrieveLocation(const Xapian::Document &)
Definition: Result.cpp:272
void addDocument(const Xapian::Document &, const Score_T &)
Definition: Result.cpp:141
std::string fullTextMatch(const Xapian::Database &, const TravelQuery_T &)
Definition: Result.cpp:521
static void trim(std::string &ioPhrase, const NbOfLetters_T &iMinWordLength=4)
Definition: Filter.cpp:131
const EnvelopeID_T & getEnvelopeID() const
Definition: Location.hpp:123
std::list< Word_T > WordList_T
void setOriginalKeywords(const std::string &iOriginalKeywords)
Definition: Place.hpp:877
void setScoreOnDocMap(const Xapian::docid &, const ScoreType &, const Score_T &)
Definition: Result.cpp:592
const PageRank_T & getPageRank() const
Definition: Location.hpp:354
void fillResult(const Xapian::MSet &iMatchingSet)
Definition: Result.cpp:197
Percentage_T calculateCombinedWeight()
Definition: ScoreBoard.cpp:186
static void tokeniseStringIntoWordList(const TravelQuery_T &, WordList_T &)
Definition: WordHolder.cpp:37
void setEditDistance(const NbOfErrors_T &iEditDistance)
Definition: Result.hpp:170
void setAllowableEditDistance(const NbOfErrors_T &iAllowableEditDistance)
Definition: Place.hpp:906
const IATACode_T & getIataCode() const
Definition: LocationKey.hpp:35
unsigned short NbOfErrors_T
const Percentage_T K_DEFAULT_MODIFIED_MATCHING_PCT
void setBestCombinedWeight(const Percentage_T &iPercentage)
Definition: Result.hpp:215
Class modelling a place/POR (point of reference).
Definition: Place.hpp:29
static bool shouldKeep(const std::string &iPhrase, const std::string &iWord)
Definition: Filter.cpp:144
const Xapian::Document & getDocument(const Xapian::docid &) const
Definition: Result.cpp:128
void setBestDocData(const std::string &iDocData)
Definition: Result.hpp:222
const Percentage_T K_DEFAULT_FULL_CODE_MATCH_PCT
const LocationKey & getKey() const
Definition: Place.hpp:59
const NbOfMatches_T K_DEFAULT_XAPIAN_MATCHING_SET_SIZE
unsigned short NbOfWords_T
Enumeration of score types.
Definition: ScoreType.hpp:25
void calculateHeuristicWeights()
Definition: Result.cpp:780
std::string describeKey() const
Definition: Result.cpp:49
void fillPlace(Place &) const
Definition: Result.cpp:211
std::string describeShortKey() const
Definition: Result.cpp:42
void setDocID(const XapianDocID_T &iDocID)
Definition: Place.hpp:920
void calculatePageRanks()
Definition: Result.cpp:746
Score_T getScore(const ScoreType &) const
Definition: ScoreBoard.cpp:40
void setScore(const ScoreType &, const Score_T &)
Definition: ScoreBoard.cpp:54
const LocationKey & getKey() const
Definition: Location.hpp:31
void toStream(std::ostream &ioOut) const
Definition: Result.cpp:97
void setBestDocID(const Xapian::docid &iDocID)
Definition: Result.hpp:207
std::string TravelQuery_T
void displayXapianPercentages() const
Definition: Result.cpp:562
void setHasFullTextMatched(const bool iHasFullTextMatched)
Definition: Result.hpp:163
void setPercentage(const MatchingPercentage_T &iPercentage)
Definition: Place.hpp:891
const Location & generateLocation()
void setAllowableEditDistance(const NbOfErrors_T &iAllowableEditDistance)
Definition: Result.hpp:178
void calculateEnvelopeWeights()
Definition: Result.cpp:614
void calculateCodeMatches()
Definition: Result.cpp:653
static unsigned int calculateEditDistance(const TravelQuery_T &iPhrase)
Helper function.