Tuesday, July 17, 2012

XQuery: Binary search on sorted collection

Big problem with XQuery is that - unlike SQL - it doesn't allow you to specify indexes on a collection of elements. All searches are "full table scan". What a pain. Performance becomes ridiculous when the elements of a collection are numerous.

An improvement can be obtained by sorting the collection with a "order by PK" (PK being the field on which you plan to perform your search), and using a recursive binary search on the collection:

xquery version "1.0" encoding "Cp1252";

declare namespace xf = "http://tempuri.org/FindPrice/";


declare function xf:FindPriceBinarySearch($loopDepth as xs:integer, $intProdId as xs:string, $prices as element()*, $startIndex as xs:integer, $endIndex as xs:integer)
    as element() {
        let $middleIndex := xs:integer(  xs:integer(($endIndex - $startIndex) div 2) + xs:integer($startIndex)) 
        let $middleElement := $prices[$middleIndex]
        let $middleProductId := $middleElement/product_id/text()
        return
        if ($loopDepth < 15 and $middleProductId < $intProdId) then
         xf:FindPriceBinarySearch($loopDepth + 1, $intProdId, $prices, $middleIndex, $endIndex)
        else if ($loopDepth < 15 and $middleProductId > $intProdId) then
           xf:FindPriceBinarySearch($loopDepth+ 1, $intProdId, $prices, $startIndex, $middleIndex)
        else
         $middleElement
          
};

declare function xf:FindPrice($intProdId as xs:string, $prices as element())
    as element() {
        xf:FindPriceBinarySearch(0, $intProdId, $prices//price, 1, count($prices//price))
};

declare variable $product_id as xs:string external;
declare variable $prices as element() external;

xf:FindPrice($product_id, $prices)


this xquery will ALMOST work (I know it doesn't find elements at the end of the collection :o( ), the input being:


<prices>
<price>
<product_id>AAA</product_id>
</price>
<price>
<product_id>BBB</product_id>
</price>
</prices>

No comments: