ref: 653e6856ea1cfc60cc16733807d23b302dbe4bd5
parent: f4f566edf4bd6a590cf9cdbd5cfc0026ecd93b14
author: Bjørn Erik Pedersen <[email protected]>
date: Fri Oct 11 09:55:46 EDT 2019
resources/page: Use binary search in Pages.Prev/Next if possible This is obviously much faster for lager data sets: ```bash name old time/op new time/op delta SearchPage/ByWeight-100-4 267ns ± 4% 272ns ± 5% ~ (p=0.457 n=4+4) SearchPage/ByWeight-5000-4 10.8µs ± 3% 1.2µs ± 2% -88.99% (p=0.029 n=4+4) SearchPage/ByWeight-10000-4 21.1µs ± 1% 1.4µs ±11% -93.28% (p=0.029 n=4+4) ``` See #4500
--- a/hugolib/pages_test.go
+++ b/hugolib/pages_test.go
@@ -63,7 +63,7 @@
Variant{"Pages.Shuffled.Prev", shufflePages, func(p page.Page, pages page.Pages) { pages.Prev(p) }},
Variant{"Pages.ByTitle.Next", func(pages page.Pages) page.Pages { return pages.ByTitle() }, func(p page.Page, pages page.Pages) { pages.Next(p) }},
} {
- for _, numPages := range []int{100, 300, 900, 5000} {
+ for _, numPages := range []int{300, 5000} {
b.Run(fmt.Sprintf("%s-pages-%d", variant.name, numPages), func(b *testing.B) {
b.StopTimer()
builder := newPagesPrevNextTestSite(b, numPages)
--- a/resources/page/pages_prev_next.go
+++ b/resources/page/pages_prev_next.go
@@ -15,26 +15,21 @@
// Next returns the next page reletive to the given
func (p Pages) Next(cur Page) Page {
- for x, c := range p {
- if c.Eq(cur) {
- if x == 0 {
- return nil
- }
- return p[x-1]
- }
+ x := searchPage(cur, p)
+ if x <= 0 {
+ return nil
}
- return nil
+ return p[x-1]
}
// Prev returns the previous page reletive to the given
func (p Pages) Prev(cur Page) Page {
- for x, c := range p {
- if c.Eq(cur) {
- if x < len(p)-1 {
- return p[x+1]
- }
- return nil
- }
+ x := searchPage(cur, p)
+
+ if x == -1 || len(p)-x < 2 {
+ return nil
}
- return nil
+
+ return p[x+1]
+
}
--- a/resources/page/pages_sort.go
+++ b/resources/page/pages_sort.go
@@ -46,60 +46,79 @@
sort.Stable(ps)
}
-// DefaultPageSort is the default sort func for pages in Hugo:
-// Order by Weight, Date, LinkTitle and then full file path.
-var DefaultPageSort = func(p1, p2 Page) bool {
- if p1.Weight() == p2.Weight() {
- if p1.Date().Unix() == p2.Date().Unix() {
- c := compare.Strings(p1.LinkTitle(), p2.LinkTitle())
- if c == 0 {
- if p1.File().IsZero() || p2.File().IsZero() {
- return p1.File().IsZero()
+var (
+
+ // DefaultPageSort is the default sort func for pages in Hugo:
+ // Order by Weight, Date, LinkTitle and then full file path.
+ DefaultPageSort = func(p1, p2 Page) bool {
+ if p1.Weight() == p2.Weight() {
+ if p1.Date().Unix() == p2.Date().Unix() {
+ c := compare.Strings(p1.LinkTitle(), p2.LinkTitle())
+ if c == 0 {
+ if p1.File().IsZero() || p2.File().IsZero() {
+ return p1.File().IsZero()
+ }
+ return compare.LessStrings(p1.File().Filename(), p2.File().Filename())
}
- return compare.LessStrings(p1.File().Filename(), p2.File().Filename())
+ return c < 0
}
- return c < 0
+ return p1.Date().Unix() > p2.Date().Unix()
}
- return p1.Date().Unix() > p2.Date().Unix()
- }
- if p2.Weight() == 0 {
- return true
- }
+ if p2.Weight() == 0 {
+ return true
+ }
- if p1.Weight() == 0 {
- return false
+ if p1.Weight() == 0 {
+ return false
+ }
+
+ return p1.Weight() < p2.Weight()
}
- return p1.Weight() < p2.Weight()
-}
+ lessPageLanguage = func(p1, p2 Page) bool {
-var languagePageSort = func(p1, p2 Page) bool {
-
- if p1.Language().Weight == p2.Language().Weight {
- if p1.Date().Unix() == p2.Date().Unix() {
- c := compare.Strings(p1.LinkTitle(), p2.LinkTitle())
- if c == 0 {
- if !p1.File().IsZero() && !p2.File().IsZero() {
- return compare.LessStrings(p1.File().Filename(), p2.File().Filename())
+ if p1.Language().Weight == p2.Language().Weight {
+ if p1.Date().Unix() == p2.Date().Unix() {
+ c := compare.Strings(p1.LinkTitle(), p2.LinkTitle())
+ if c == 0 {
+ if !p1.File().IsZero() && !p2.File().IsZero() {
+ return compare.LessStrings(p1.File().Filename(), p2.File().Filename())
+ }
}
+ return c < 0
}
- return c < 0
+ return p1.Date().Unix() > p2.Date().Unix()
}
- return p1.Date().Unix() > p2.Date().Unix()
+
+ if p2.Language().Weight == 0 {
+ return true
+ }
+
+ if p1.Language().Weight == 0 {
+ return false
+ }
+
+ return p1.Language().Weight < p2.Language().Weight
}
- if p2.Language().Weight == 0 {
- return true
+ lessPageTitle = func(p1, p2 Page) bool {
+ return compare.LessStrings(p1.Title(), p2.Title())
}
- if p1.Language().Weight == 0 {
- return false
+ lessPageLinkTitle = func(p1, p2 Page) bool {
+ return compare.LessStrings(p1.LinkTitle(), p2.LinkTitle())
}
- return p1.Language().Weight < p2.Language().Weight
-}
+ lessPageDate = func(p1, p2 Page) bool {
+ return p1.Date().Unix() < p2.Date().Unix()
+ }
+ lessPagePubDate = func(p1, p2 Page) bool {
+ return p1.PublishDate().Unix() < p2.PublishDate().Unix()
+ }
+)
+
func (ps *pageSorter) Len() int { return len(ps.pages) }
func (ps *pageSorter) Swap(i, j int) { ps.pages[i], ps.pages[j] = ps.pages[j], ps.pages[i] }
@@ -139,11 +158,7 @@
const key = "pageSort.ByTitle"
- title := func(p1, p2 Page) bool {
- return compare.LessStrings(p1.Title(), p2.Title())
- }
-
- pages, _ := spc.get(key, pageBy(title).Sort, p)
+ pages, _ := spc.get(key, pageBy(lessPageTitle).Sort, p)
return pages
}
@@ -156,12 +171,8 @@
const key = "pageSort.ByLinkTitle"
- linkTitle := func(p1, p2 Page) bool {
- return compare.LessStrings(p1.LinkTitle(), p2.LinkTitle())
- }
+ pages, _ := spc.get(key, pageBy(lessPageLinkTitle).Sort, p)
- pages, _ := spc.get(key, pageBy(linkTitle).Sort, p)
-
return pages
}
@@ -174,12 +185,8 @@
const key = "pageSort.ByDate"
- date := func(p1, p2 Page) bool {
- return p1.Date().Unix() < p2.Date().Unix()
- }
+ pages, _ := spc.get(key, pageBy(lessPageDate).Sort, p)
- pages, _ := spc.get(key, pageBy(date).Sort, p)
-
return pages
}
@@ -192,12 +199,8 @@
const key = "pageSort.ByPublishDate"
- pubDate := func(p1, p2 Page) bool {
- return p1.PublishDate().Unix() < p2.PublishDate().Unix()
- }
+ pages, _ := spc.get(key, pageBy(lessPagePubDate).Sort, p)
- pages, _ := spc.get(key, pageBy(pubDate).Sort, p)
-
return pages
}
@@ -276,7 +279,7 @@
const key = "pageSort.ByLanguage"
- pages, _ := spc.get(key, pageBy(languagePageSort).Sort, p)
+ pages, _ := spc.get(key, pageBy(lessPageLanguage).Sort, p)
return pages
}
@@ -283,7 +286,7 @@
// SortByLanguage sorts the pages by language.
func SortByLanguage(pages Pages) {
- pageBy(languagePageSort).Sort(pages)
+ pageBy(lessPageLanguage).Sort(pages)
}
// Reverse reverses the order in Pages and returns a copy.
--- /dev/null
+++ b/resources/page/pages_sort_search.go
@@ -1,0 +1,126 @@
+// Copyright 2019 The Hugo Authors. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package page
+
+import "sort"
+
+// Used in page binary search, the most common in front.
+var pageLessFunctions = []func(p1, p2 Page) bool{
+ DefaultPageSort,
+ lessPageDate,
+ lessPagePubDate,
+ lessPageTitle,
+ lessPageLinkTitle,
+}
+
+func searchPage(p Page, pages Pages) int {
+ if len(pages) < 1000 {
+ // For smaller data sets, doing a linear search is faster.
+ return searchPageLinear(p, pages, 0)
+ }
+
+ less := isPagesProbablySorted(pages, pageLessFunctions...)
+ if less == nil {
+ return searchPageLinear(p, pages, 0)
+ }
+
+ i := searchPageBinary(p, pages, less)
+ if i != -1 {
+ return i
+ }
+
+ return searchPageLinear(p, pages, 0)
+}
+
+func searchPageLinear(p Page, pages Pages, start int) int {
+ for i := start; i < len(pages); i++ {
+ c := pages[i]
+ if c.Eq(p) {
+ return i
+ }
+ }
+ return -1
+}
+
+func searchPageBinary(p Page, pages Pages, less func(p1, p2 Page) bool) int {
+ n := len(pages)
+
+ f := func(i int) bool {
+ c := pages[i]
+ isLess := less(c, p)
+ return !isLess || c.Eq(p)
+ }
+
+ i := sort.Search(n, f)
+
+ if i == n {
+ return -1
+ }
+
+ return searchPageLinear(p, pages, i)
+
+}
+
+// isProbablySorted tests if the pages slice is probably sorted.
+func isPagesProbablySorted(pages Pages, lessFuncs ...func(p1, p2 Page) bool) func(p1, p2 Page) bool {
+ n := len(pages)
+ step := 1
+ if n > 500 {
+ step = 50
+ }
+
+ is := func(less func(p1, p2 Page) bool) bool {
+ samples := 0
+
+ for i := n - 1; i > 0; i = i - step {
+ if less(pages[i], pages[i-1]) {
+ return false
+ }
+ samples++
+ if samples >= 15 {
+ return true
+ }
+ }
+ return samples > 0
+ }
+
+ isReverse := func(less func(p1, p2 Page) bool) bool {
+ samples := 0
+
+ for i := 0; i < n-1; i = i + step {
+ if less(pages[i], pages[i+1]) {
+ return false
+ }
+ samples++
+
+ if samples > 15 {
+ return true
+ }
+ }
+ return samples > 0
+ }
+
+ for _, less := range lessFuncs {
+ if is(less) {
+ return less
+ }
+ if isReverse(less) {
+ return func(p1, p2 Page) bool {
+ return less(p2, p1)
+ }
+ }
+ }
+
+ return nil
+}
--- /dev/null
+++ b/resources/page/pages_sort_search_test.go
@@ -1,0 +1,124 @@
+// Copyright 2019 The Hugo Authors. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package page
+
+import (
+ "fmt"
+ "math/rand"
+ "testing"
+ "time"
+
+ qt "github.com/frankban/quicktest"
+)
+
+func TestSearchPage(t *testing.T) {
+ t.Parallel()
+ c := qt.New(t)
+ pages := createSortTestPages(10)
+ for i, p := range pages {
+ p.(*testPage).title = fmt.Sprintf("Title %d", i%2)
+ }
+
+ for _, pages := range []Pages{pages.ByTitle(), pages.ByTitle().Reverse()} {
+ less := isPagesProbablySorted(pages, lessPageTitle)
+ c.Assert(less, qt.Not(qt.IsNil))
+ for i, p := range pages {
+ idx := searchPageBinary(p, pages, less)
+ c.Assert(idx, qt.Equals, i)
+ }
+ }
+
+}
+
+func BenchmarkSearchPage(b *testing.B) {
+ type Variant struct {
+ name string
+ preparePages func(pages Pages) Pages
+ search func(p Page, pages Pages) int
+ }
+
+ shufflePages := func(pages Pages) Pages {
+ rand.Shuffle(len(pages), func(i, j int) { pages[i], pages[j] = pages[j], pages[i] })
+ return pages
+ }
+
+ linearSearch := func(p Page, pages Pages) int {
+ return searchPageLinear(p, pages, 0)
+ }
+
+ createPages := func(num int) Pages {
+ pages := createSortTestPages(num)
+ for _, p := range pages {
+ tp := p.(*testPage)
+ tp.weight = rand.Intn(len(pages))
+ tp.title = fmt.Sprintf("Title %d", rand.Intn(len(pages)))
+
+ tp.pubDate = time.Now().Add(time.Duration(rand.Intn(len(pages)/5)) * time.Hour)
+ tp.date = time.Now().Add(time.Duration(rand.Intn(len(pages)/5)) * time.Hour)
+ }
+
+ return pages
+ }
+
+ for _, variant := range []Variant{
+ Variant{"Shuffled", shufflePages, searchPage},
+ Variant{"ByWeight", func(pages Pages) Pages {
+ return pages.ByWeight()
+ }, searchPage},
+ Variant{"ByWeight.Reverse", func(pages Pages) Pages {
+ return pages.ByWeight().Reverse()
+ }, searchPage},
+ Variant{"ByDate", func(pages Pages) Pages {
+ return pages.ByDate()
+ }, searchPage},
+ Variant{"ByPublishDate", func(pages Pages) Pages {
+ return pages.ByPublishDate()
+ }, searchPage},
+ Variant{"ByTitle", func(pages Pages) Pages {
+ return pages.ByTitle()
+ }, searchPage},
+ Variant{"ByTitle Linear", func(pages Pages) Pages {
+ return pages.ByTitle()
+ }, linearSearch},
+ } {
+ for _, numPages := range []int{100, 500, 1000, 5000} {
+ b.Run(fmt.Sprintf("%s-%d", variant.name, numPages), func(b *testing.B) {
+ b.StopTimer()
+ pages := createPages(numPages)
+ if variant.preparePages != nil {
+ pages = variant.preparePages(pages)
+ }
+ b.StartTimer()
+ for i := 0; i < b.N; i++ {
+ j := rand.Intn(numPages)
+ k := variant.search(pages[j], pages)
+ if k != j {
+ b.Fatalf("%d != %d", k, j)
+ }
+ }
+ })
+ }
+ }
+}
+
+func TestIsPagesProbablySorted(t *testing.T) {
+ t.Parallel()
+ c := qt.New(t)
+
+ c.Assert(isPagesProbablySorted(createSortTestPages(6).ByWeight(), DefaultPageSort), qt.Not(qt.IsNil))
+ c.Assert(isPagesProbablySorted(createSortTestPages(300).ByWeight(), DefaultPageSort), qt.Not(qt.IsNil))
+ c.Assert(isPagesProbablySorted(createSortTestPages(6), DefaultPageSort), qt.IsNil)
+ c.Assert(isPagesProbablySorted(createSortTestPages(300).ByTitle(), pageLessFunctions...), qt.Not(qt.IsNil))
+
+}
--- a/resources/page/pages_sort_test.go
+++ b/resources/page/pages_sort_test.go
@@ -269,6 +269,7 @@
for i := 0; i < num; i++ {
p := newTestPage()
p.path = fmt.Sprintf("/x/y/p%d.md", i)
+ p.title = fmt.Sprintf("Title %d", i%(num+1/2))
p.params = map[string]interface{}{
"arbitrarily": map[string]interface{}{
"nested": ("xyz" + fmt.Sprintf("%v", 100-i)),
--- a/resources/page/testhelpers_test.go
+++ b/resources/page/testhelpers_test.go
@@ -310,6 +310,9 @@
func (p *testPage) LinkTitle() string {
if p.linkTitle == "" {
+ if p.title == "" {
+ return p.path
+ }
return p.title
}
return p.linkTitle
--- a/resources/page/weighted.go
+++ b/resources/page/weighted.go
@@ -99,7 +99,7 @@
// this weighted page set.
func (wp WeightedPages) Next(cur Page) Page {
for x, c := range wp {
- if c.Page == cur {
+ if c.Page.Eq(cur) {
if x == 0 {
return nil
}
@@ -113,7 +113,7 @@
// this weighted page set.
func (wp WeightedPages) Prev(cur Page) Page {
for x, c := range wp {
- if c.Page == cur {
+ if c.Page.Eq(cur) {
if x < len(wp)-1 {
return wp[x+1].Page
}