1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 package main
23
24 import (
25 "bufio"
26 "bytes"
27 "errors"
28 "fmt"
29 "go/format"
30 "io/ioutil"
31 "log"
32 "net/http"
33 "os"
34 "regexp"
35 "sort"
36 "strconv"
37 "strings"
38 "time"
39 )
40
41
42
43 const (
44 propertyURL = `https://www.unicode.org/Public/15.0.0/ucd/%s.txt`
45 emojiURL = `https://unicode.org/Public/15.0.0/ucd/emoji/emoji-data.txt`
46 )
47
48
49 var propertyPattern = regexp.MustCompile(`^([0-9A-F]{4,6})(\.\.([0-9A-F]{4,6}))?\s*;\s*([A-Za-z0-9_]+)\s*#\s(.+)$`)
50
51 func main() {
52 if len(os.Args) < 5 {
53 fmt.Println("Not enough arguments, see code for details")
54 os.Exit(1)
55 }
56
57 log.SetPrefix("gen_properties (" + os.Args[4] + "): ")
58 log.SetFlags(0)
59
60
61 flags := make(map[string]string)
62 if len(os.Args) >= 6 {
63 for _, flag := range strings.Split(os.Args[5], ",") {
64 flagFields := strings.Split(flag, "=")
65 if len(flagFields) == 1 {
66 flags[flagFields[0]] = "yes"
67 } else {
68 flags[flagFields[0]] = flagFields[1]
69 }
70 }
71 }
72
73
74 _, includeGeneralCategory := flags["gencat"]
75 var mainURL string
76 if os.Args[1] != "-" {
77 mainURL = fmt.Sprintf(propertyURL, os.Args[1])
78 }
79 src, err := parse(mainURL, flags["emojis"], includeGeneralCategory)
80 if err != nil {
81 log.Fatal(err)
82 }
83
84
85 formatted, err := format.Source([]byte(src))
86 if err != nil {
87 log.Fatal("gofmt:", err)
88 }
89
90
91 log.Print("Writing to ", os.Args[2])
92 if err := ioutil.WriteFile(os.Args[2], formatted, 0644); err != nil {
93 log.Fatal(err)
94 }
95 }
96
97
98
99
100
101
102
103
104 func parse(propertyURL, emojiProperty string, includeGeneralCategory bool) (string, error) {
105 if propertyURL == "" && emojiProperty == "" {
106 return "", errors.New("no properties to parse")
107 }
108
109
110 var properties [][4]string
111
112
113 if propertyURL != "" {
114 log.Printf("Parsing %s", propertyURL)
115 res, err := http.Get(propertyURL)
116 if err != nil {
117 return "", err
118 }
119 in1 := res.Body
120 defer in1.Close()
121
122
123 scanner := bufio.NewScanner(in1)
124 num := 0
125 for scanner.Scan() {
126 num++
127 line := strings.TrimSpace(scanner.Text())
128
129
130 if strings.HasPrefix(line, "#") || line == "" {
131 continue
132 }
133
134
135 from, to, property, comment, err := parseProperty(line)
136 if err != nil {
137 return "", fmt.Errorf("%s line %d: %v", os.Args[4], num, err)
138 }
139 properties = append(properties, [4]string{from, to, property, comment})
140 }
141 if err := scanner.Err(); err != nil {
142 return "", err
143 }
144 }
145
146
147 if emojiProperty != "" {
148 log.Printf("Parsing %s", emojiURL)
149 res, err := http.Get(emojiURL)
150 if err != nil {
151 return "", err
152 }
153 in2 := res.Body
154 defer in2.Close()
155
156
157 scanner := bufio.NewScanner(in2)
158 num := 0
159 for scanner.Scan() {
160 num++
161 line := scanner.Text()
162
163
164
165 if strings.HasPrefix(line, "#") || line == "" || !strings.Contains(line, emojiProperty) {
166 continue
167 }
168
169
170 from, to, property, comment, err := parseProperty(line)
171 if err != nil {
172 return "", fmt.Errorf("emojis line %d: %v", num, err)
173 }
174 properties = append(properties, [4]string{from, to, property, comment})
175 }
176 if err := scanner.Err(); err != nil {
177 return "", err
178 }
179 }
180
181
182 if len(properties) >= 1<<31 {
183 return "", errors.New("too many properties")
184 }
185
186
187 sort.Slice(properties, func(i, j int) bool {
188 left, _ := strconv.ParseUint(properties[i][0], 16, 64)
189 right, _ := strconv.ParseUint(properties[j][0], 16, 64)
190 return left < right
191 })
192
193
194 var (
195 buf bytes.Buffer
196 emojiComment string
197 )
198 columns := 3
199 if includeGeneralCategory {
200 columns = 4
201 }
202 if emojiURL != "" {
203 emojiComment = `
204 // and
205 // ` + emojiURL + `
206 // ("Extended_Pictographic" only)`
207 }
208 buf.WriteString(`// Code generated via go generate from gen_properties.go. DO NOT EDIT.
209
210 package uniseg
211
212 // ` + os.Args[3] + ` are taken from
213 // ` + propertyURL + emojiComment + `
214 // on ` + time.Now().Format("January 2, 2006") + `. See https://www.unicode.org/license.html for the Unicode
215 // license agreement.
216 var ` + os.Args[3] + ` = [][` + strconv.Itoa(columns) + `]int{
217 `)
218
219
220 for _, prop := range properties {
221 if includeGeneralCategory {
222 generalCategory := "gc" + prop[3][:2]
223 if generalCategory == "gcL&" {
224 generalCategory = "gcLC"
225 }
226 prop[3] = prop[3][3:]
227 fmt.Fprintf(&buf, "{0x%s,0x%s,%s,%s}, // %s\n", prop[0], prop[1], translateProperty("pr", prop[2]), generalCategory, prop[3])
228 } else {
229 fmt.Fprintf(&buf, "{0x%s,0x%s,%s}, // %s\n", prop[0], prop[1], translateProperty("pr", prop[2]), prop[3])
230 }
231 }
232
233
234 buf.WriteString("}")
235
236 return buf.String(), nil
237 }
238
239
240
241 func parseProperty(line string) (from, to, property, comment string, err error) {
242 fields := propertyPattern.FindStringSubmatch(line)
243 if fields == nil {
244 err = errors.New("no property found")
245 return
246 }
247 from = fields[1]
248 to = fields[3]
249 if to == "" {
250 to = from
251 }
252 property = fields[4]
253 comment = fields[5]
254 return
255 }
256
257
258
259 func translateProperty(prefix, property string) string {
260 return prefix + strings.ReplaceAll(property, "_", "")
261 }
262
View as plain text