Diacritics substitution in prompt (#7205)

This commit is contained in:
Benjamin Levesque 2023-04-21 16:09:59 +02:00 committed by GitHub
parent 8b2cea1b8f
commit 7cfbf478d5
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 169 additions and 0 deletions

View file

@ -7,6 +7,7 @@ import (
"github.com/AlecAivazis/survey/v2"
"github.com/cli/cli/v2/internal/ghinstance"
"github.com/cli/cli/v2/internal/text"
"github.com/cli/cli/v2/pkg/surveyext"
)
@ -49,11 +50,24 @@ type surveyPrompter struct {
stderr io.Writer
}
// LatinMatchingFilter returns whether the value matches the input filter.
// The strings are compared normalized in case.
// The filter's diactritics are kept as-is, but the value's are normalized,
// so that a missing diactritic in the filter still returns a result.
func LatinMatchingFilter(filter, value string, index int) bool {
filter = strings.ToLower(filter)
value = strings.ToLower(value)
// include this option if it matches.
return strings.Contains(value, filter) || strings.Contains(text.RemoveDiacritics(value), filter)
}
func (p *surveyPrompter) Select(message, defaultValue string, options []string) (result int, err error) {
q := &survey.Select{
Message: message,
Options: options,
PageSize: 20,
Filter: LatinMatchingFilter,
}
if defaultValue != "" {
@ -77,6 +91,7 @@ func (p *surveyPrompter) MultiSelect(message, defaultValue string, options []str
Message: message,
Options: options,
PageSize: 20,
Filter: LatinMatchingFilter,
}
if defaultValue != "" {

View file

@ -0,0 +1,78 @@
package prompter
import (
"testing"
"github.com/stretchr/testify/assert"
)
func TestFilterDiacritics(t *testing.T) {
tests := []struct {
name string
filter string
value string
want bool
}{
{
name: "exact match no diacritics",
filter: "Mikelis",
value: "Mikelis",
want: true,
},
{
name: "exact match no diacritics",
filter: "Mikelis",
value: "Mikelis",
want: true,
},
{
name: "exact match diacritics",
filter: "Miķelis",
value: "Miķelis",
want: true,
},
{
name: "partial match diacritics",
filter: "Miķe",
value: "Miķelis",
want: true,
},
{
name: "exact match diacritics in value",
filter: "Mikelis",
value: "Miķelis",
want: true,
},
{
name: "partial match diacritics in filter",
filter: "Miķe",
value: "Miķelis",
want: true,
},
{
name: "no match when removing diacritics in filter",
filter: "Mielis",
value: "Mikelis",
want: false,
},
{
name: "no match when removing diacritics in value",
filter: "Mikelis",
value: "Mielis",
want: false,
},
{
name: "no match diacritics in filter",
filter: "Miķelis",
value: "Mikelis",
want: false,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
assert.Equal(t, LatinMatchingFilter(tt.filter, tt.value, 0), tt.want)
})
}
}

View file

@ -6,10 +6,14 @@ import (
"regexp"
"strings"
"time"
"unicode"
"github.com/cli/go-gh/v2/pkg/text"
"golang.org/x/text/cases"
"golang.org/x/text/language"
"golang.org/x/text/runes"
"golang.org/x/text/transform"
"golang.org/x/text/unicode/norm"
)
var whitespaceRE = regexp.MustCompile(`\s+`)
@ -72,3 +76,19 @@ func DisplayURL(urlStr string) string {
}
return u.Hostname() + u.Path
}
// RemoveDiacritics returns the input value without "diacritics", or accent marks
func RemoveDiacritics(value string) string {
// Mn = "Mark, nonspacing" unicode character category
removeMnTransfomer := runes.Remove(runes.In(unicode.Mn))
// 1/ Decompose the text into characters and diacritical marks,
// 2/ Remove the diacriticals marks
// 3/ Recompose the text
t := transform.Chain(norm.NFD, removeMnTransfomer, norm.NFC)
normalized, _, err := transform.String(t, value)
if err != nil {
return value
}
return normalized
}

View file

@ -54,3 +54,57 @@ func TestFuzzyAgoAbbr(t *testing.T) {
assert.Equal(t, expected, fuzzy)
}
}
func TestRemoveDiacritics(t *testing.T) {
tests := [][]string{
// no diacritics
{"e", "e"},
{"و", "و"},
{"И", "И"},
{"ж", "ж"},
{"私", "私"},
{"万", "万"},
// diacritics test sets
{"à", "a"},
{"é", "e"},
{"è", "e"},
{"ô", "o"},
{"ᾳ", "α"},
{"εͅ", "ε"},
{"ῃ", "η"},
{"ιͅ", "ι"},
{"ؤ", "و"},
{"ā", "a"},
{"č", "c"},
{"ģ", "g"},
{"ķ", "k"},
{"ņ", "n"},
{"š", "s"},
{"ž", "z"},
{"ŵ", "w"},
{"ŷ", "y"},
{"ä", "a"},
{"ÿ", "y"},
{"á", "a"},
{"ẁ", "w"},
{"ỳ", "y"},
{"ō", "o"},
// full words
{"Miķelis", "Mikelis"},
{"François", "Francois"},
{"žluťoučký", "zlutoucky"},
{"învățătorița", "invatatorita"},
{"Kękę przy łóżku", "Keke przy łozku"},
}
for _, tt := range tests {
t.Run(RemoveDiacritics(tt[0]), func(t *testing.T) {
assert.Equal(t, tt[1], RemoveDiacritics(tt[0]))
})
}
}

View file

@ -7,6 +7,7 @@ import (
"github.com/AlecAivazis/survey/v2"
"github.com/cli/cli/v2/api"
"github.com/cli/cli/v2/internal/ghrepo"
"github.com/cli/cli/v2/internal/prompter"
"github.com/cli/cli/v2/pkg/set"
"github.com/cli/cli/v2/pkg/surveyext"
)
@ -395,6 +396,7 @@ func multiSelectSurvey(message string, defaults, options []string) ([]string, er
Message: message,
Options: options,
Default: defaults,
Filter: prompter.LatinMatchingFilter,
}
err := survey.AskOne(q, &results)
return results, err