Skip to content

Commit 34566cc

Browse files
committed
parse VTT
1 parent 216a9b1 commit 34566cc

File tree

6 files changed

+127
-21
lines changed

6 files changed

+127
-21
lines changed

LICENSE

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
The MIT License (MIT)
22

3-
Copyright (c) 2015-2018 Martin Lindhe
3+
Copyright (c) 2015-2019 Martin Lindhe
44

55
Permission is hereby granted, free of charge, to any person obtaining a copy
66
of this software and associated documentation files (the "Software"), to deal

README.md

Lines changed: 6 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -5,25 +5,21 @@
55

66
This is a go library and command-line tools for handling .srt, .vtt and .ssa subtitles
77

8-
9-
# Installation
8+
## Installation
109

1110
Windows and macOS binaries are available under [Releases](https://github.com/martinlindhe/subtitles/releases)
1211

1312
Or install them directly from git:
1413

15-
```
14+
```sh
1615
go get -u github.com/martinlindhe/subtitles/...
1716
```
1817

19-
20-
21-
# Sub-projects
18+
## Sub-projects
2219

2320
- [subber](https://github.com/martinlindhe/subtitles/tree/master/cmd/subber) command line tool for subtitles
2421

25-
26-
# Library example - convert srt to vtt
22+
## Library example - convert srt to vtt
2723

2824
```go
2925
import "github.com/martinlindhe/subtitles"
@@ -47,8 +43,7 @@ res, _ := subtitles.NewFromSRT(in)
4743
fmt.Println(res.AsVTT())
4844
```
4945

50-
51-
# Library example - download subtitles
46+
## Library example - download subtitles
5247

5348
```go
5449
f, _ := os.Open(fileName)
@@ -58,7 +53,6 @@ finder := subtitles.NewSubFinder(f, fileName, "en")
5853
text, err := finder.TheSubDb()
5954
```
6055

61-
62-
# License
56+
## License
6357

6458
Under [MIT](LICENSE)

parser.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@ func Parse(b []byte) (Subtitle, error) {
1717
return NewFromDCSub(s)
1818
} else if looksLikeSRT(s) {
1919
return NewFromSRT(s)
20+
} else if looksLikeVTT(s) {
21+
return NewFromVTT(s)
2022
}
2123
return Subtitle{}, fmt.Errorf("parse: unrecognized subtitle type")
2224
}
@@ -28,5 +30,5 @@ func LooksLikeTextSubtitle(filename string) bool {
2830
log.Fatal(err)
2931
}
3032
s := ConvertToUTF8(data)
31-
return looksLikeCCDBCapture(s) || looksLikeSSA(s) || looksLikeDCSub(s) || looksLikeSRT(s)
33+
return looksLikeCCDBCapture(s) || looksLikeSSA(s) || looksLikeDCSub(s) || looksLikeSRT(s) || looksLikeVTT(s)
3234
}

srt.go

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -11,17 +11,15 @@ import (
1111

1212
// Eol is the end of line characters to use when writing .srt data
1313
var eol = "\n"
14+
1415
func init() {
1516
if runtime.GOOS == "windows" {
1617
eol = "\r\n"
1718
}
1819
}
1920

2021
func looksLikeSRT(s string) bool {
21-
if strings.HasPrefix(s, "1\n") || strings.HasPrefix(s, "1\r\n") {
22-
return true
23-
}
24-
return false
22+
return strings.HasPrefix(s, "1\n") || strings.HasPrefix(s, "1\r\n")
2523
}
2624

2725
// NewFromSRT parses a .srt text into Subtitle, assumes s is a clean utf8 string
@@ -36,7 +34,7 @@ func NewFromSRT(s string) (res Subtitle, err error) {
3634
continue
3735
}
3836

39-
_, err := strconv.Atoi(seq)
37+
_, err = strconv.Atoi(seq)
4038
if err != nil {
4139
err = fmt.Errorf("srt: atoi error at line %d: %v", i, err)
4240
break
@@ -52,7 +50,7 @@ func NewFromSRT(s string) (res Subtitle, err error) {
5250

5351
matches := r1.FindStringSubmatch(lines[i])
5452
if len(matches) < 3 {
55-
err = fmt.Errorf("srt: parse error at line %d (idx out of range)", i)
53+
err = fmt.Errorf("srt: parse error at line %d (idx out of range) for input '%s'", i, lines[i])
5654
break
5755
}
5856

vtt.go

Lines changed: 78 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,18 @@
11
package subtitles
22

33
import (
4+
"fmt"
5+
"regexp"
6+
"strings"
47
"time"
58
)
69

10+
var webVTTTag = "WEBVTT\n"
11+
712
// AsVTT renders the sub in WebVTT format
813
// https://en.wikipedia.org/wiki/WebVTT
914
func (subtitle *Subtitle) AsVTT() (res string) {
10-
res = "WEBVTT\n\n"
15+
res = webVTTTag + "\n"
1116
for _, sub := range subtitle.Captions {
1217
res += sub.AsVTT()
1318
}
@@ -30,3 +35,75 @@ func TimeVTT(t time.Time) string {
3035
}
3136
return t.Format("15:04:05.000")
3237
}
38+
39+
func looksLikeVTT(s string) bool {
40+
return strings.HasPrefix(s, webVTTTag)
41+
}
42+
43+
// NewFromVTT parses a .vtt text into Subtitle, assumes s is a clean utf8 string
44+
func NewFromVTT(s string) (res Subtitle, err error) {
45+
idx := strings.Index(s, webVTTTag)
46+
if idx == -1 {
47+
return res, fmt.Errorf("not a vtt")
48+
}
49+
s = s[idx+len(webVTTTag):]
50+
51+
r1 := regexp.MustCompile("([0-9:.,]*) --> ([0-9:.,]*)")
52+
lines := strings.Split(s, "\n")
53+
outSeq := 1
54+
55+
for i := 0; i < len(lines); i++ {
56+
seq := strings.Trim(lines[i], "\r ")
57+
if seq == "" {
58+
continue
59+
}
60+
61+
var o Caption
62+
o.Seq = outSeq
63+
64+
matches := r1.FindStringSubmatch(lines[i])
65+
if len(matches) < 3 {
66+
err = fmt.Errorf("vtt: parse error at line %d (idx out of range) for input '%s'", i, lines[i])
67+
break
68+
}
69+
70+
o.Start, err = parseTime(matches[1])
71+
if err != nil {
72+
err = fmt.Errorf("vtt: start error at line %d: %v", i, err)
73+
break
74+
}
75+
76+
o.End, err = parseTime(matches[2])
77+
if err != nil {
78+
err = fmt.Errorf("vtt: end error at line %d: %v", i, err)
79+
break
80+
}
81+
82+
i++
83+
if i >= len(lines) {
84+
break
85+
}
86+
87+
textLine := 1
88+
for {
89+
line := strings.Trim(lines[i], "\r ")
90+
if line == "" && textLine > 1 {
91+
break
92+
}
93+
if line != "" {
94+
o.Text = append(o.Text, line)
95+
}
96+
i++
97+
if i >= len(lines) {
98+
break
99+
}
100+
textLine++
101+
}
102+
103+
if len(o.Text) > 0 {
104+
res.Captions = append(res.Captions, o)
105+
outSeq++
106+
}
107+
}
108+
return
109+
}

vtt_test.go

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,39 @@ import (
77
"github.com/stretchr/testify/assert"
88
)
99

10+
func TestParseVTT(t *testing.T) {
11+
txt := "WEBVTT\n" +
12+
"\n" +
13+
"00:00:00.000 --> 00:00:05.560\n" +
14+
"I Vetenskapens värld: ett experiment\n" +
15+
"jag aldrig kommer att glömma.\n" +
16+
"\n" +
17+
"00:00:10.840 --> 00:00:15.760\n" +
18+
"Om en person får kämpa\n" +
19+
"för att hålla sig vaken–\n"
20+
21+
res, err := NewFromVTT(txt)
22+
assert.Equal(t, nil, err)
23+
24+
expected := Subtitle{
25+
[]Caption{{
26+
1,
27+
makeTime(0, 0, 0, 0),
28+
makeTime(0, 0, 5, 560),
29+
[]string{
30+
"I Vetenskapens värld: ett experiment",
31+
"jag aldrig kommer att glömma.",
32+
}}, {
33+
2,
34+
makeTime(0, 0, 10, 840),
35+
makeTime(0, 0, 15, 760),
36+
[]string{
37+
"Om en person får kämpa",
38+
"för att hålla sig vaken–",
39+
}}}}
40+
assert.Equal(t, expected, res)
41+
}
42+
1043
func TestAsVTT(t *testing.T) {
1144
expected := "WEBVTT\n" +
1245
"\n" +
@@ -16,6 +49,8 @@ func TestAsVTT(t *testing.T) {
1649
"01:09.630 --> 01:11.005\n" +
1750
"No ninja!\n\n"
1851

52+
assert.Equal(t, true, looksLikeVTT(expected))
53+
1954
in := Subtitle{[]Caption{{
2055
1,
2156
makeTime(0, 0, 4, 630),

0 commit comments

Comments
 (0)