Add word-count exercise (#253)

keiravillekode · web-flow · commit 730842a47af0 · 2025-04-07T18:46:21.000+10:00
diff --git a/config.json b/config.json
@@ -415,6 +415,14 @@
         "prerequisites": [],
         "difficulty": 3
       },
+      {
+        "slug": "word-count",
+        "name": "Word Count",
+        "uuid": "438c1838-390a-439f-964a-a6c8d1855b7e",
+        "practices": [],
+        "prerequisites": [],
+        "difficulty": 3
+      },
       {
         "slug": "affine-cipher",
         "name": "Affine Cipher",
diff --git a/exercises/practice/word-count/.docs/instructions.md b/exercises/practice/word-count/.docs/instructions.md
@@ -0,0 +1,47 @@
+# Instructions
+
+Your task is to count how many times each word occurs in a subtitle of a drama.
+
+The subtitles from these dramas use only ASCII characters.
+
+The characters often speak in casual English, using contractions like _they're_ or _it's_.
+Though these contractions come from two words (e.g. _we are_), the contraction (_we're_) is considered a single word.
+
+Words can be separated by any form of punctuation (e.g. ":", "!", or "?") or whitespace (e.g. "\t", "\n", or " ").
+The only punctuation that does not separate words is the apostrophe in contractions.
+
+Numbers are considered words.
+If the subtitles say _It costs 100 dollars._ then _100_ will be its own word.
+
+Words are case insensitive.
+For example, the word _you_ occurs three times in the following sentence:
+
+> You come back, you hear me? DO YOU HEAR ME?
+
+The ordering of the word counts in the results doesn't matter.
+
+Here's an example that incorporates several of the elements discussed above:
+
+- simple words
+- contractions
+- numbers
+- case insensitive words
+- punctuation (including apostrophes) to separate words
+- different forms of whitespace to separate words
+
+`"That's the password: 'PASSWORD 123'!", cried the Special Agent.\nSo I fled.`
+
+The mapping for this subtitle would be:
+
+```text
+123: 1
+agent: 1
+cried: 1
+fled: 1
+i: 1
+password: 2
+so: 1
+special: 1
+that's: 1
+the: 2
+```
diff --git a/exercises/practice/word-count/.docs/introduction.md b/exercises/practice/word-count/.docs/introduction.md
@@ -0,0 +1,8 @@
+# Introduction
+
+You teach English as a foreign language to high school students.
+
+You've decided to base your entire curriculum on TV shows.
+You need to analyze which words are used, and how often they're repeated.
+
+This will let you choose the simplest shows to start with, and to gradually increase the difficulty as time passes.
diff --git a/exercises/practice/word-count/.meta/config.json b/exercises/practice/word-count/.meta/config.json
@@ -0,0 +1,18 @@
+{
+  "authors": [
+    "keiravillekode"
+  ],
+  "files": {
+    "solution": [
+      "word-count.v"
+    ],
+    "test": [
+      "run_test.v"
+    ],
+    "example": [
+      ".meta/example.v"
+    ]
+  },
+  "blurb": "Given a phrase, count the occurrences of each word in that phrase.",
+  "source": "This is a classic toy problem, but we were reminded of it by seeing it in the Go Tour."
+}
diff --git a/exercises/practice/word-count/.meta/example.v b/exercises/practice/word-count/.meta/example.v
@@ -0,0 +1,45 @@
+module main
+
+fn count_words(sentence string) map[string]int {
+	mut result := map[string]int{}
+	mut first := 0
+
+	outer: for {
+		for {
+			if first >= sentence.len {
+				break outer
+			}
+
+			ch := sentence[first]
+			if ch.is_letter() || ch.is_digit() {
+				break
+			}
+
+			first++
+		}
+
+		mut last := first + 1
+		for {
+			if last == sentence.len {
+				break
+			}
+
+			ch := sentence[last]
+			if !ch.is_letter() && !ch.is_digit() && ch != `'` {
+				break
+			}
+
+			last++
+		}
+
+		for sentence[last - 1] == `'` {
+			last--
+		}
+
+		result[sentence[first..last].to_lower()]++
+
+		first = last + 1
+	}
+
+	return result
+}
diff --git a/exercises/practice/word-count/.meta/tests.toml b/exercises/practice/word-count/.meta/tests.toml
@@ -0,0 +1,50 @@
+# This is an auto-generated file. Regular comments will be removed when this
+# file is regenerated. Regenerating will not touch any manually added keys,
+# so comments can be added in a "comment" key.
+
+[61559d5f-2cad-48fb-af53-d3973a9ee9ef]
+description = "count one word"
+
+[5abd53a3-1aed-43a4-a15a-29f88c09cbbd]
+description = "count one of each word"
+
+[2a3091e5-952e-4099-9fac-8f85d9655c0e]
+description = "multiple occurrences of a word"
+
+[e81877ae-d4da-4af4-931c-d923cd621ca6]
+description = "handles cramped lists"
+
+[7349f682-9707-47c0-a9af-be56e1e7ff30]
+description = "handles expanded lists"
+
+[a514a0f2-8589-4279-8892-887f76a14c82]
+description = "ignore punctuation"
+
+[d2e5cee6-d2ec-497b-bdc9-3ebe092ce55e]
+description = "include numbers"
+
+[dac6bc6a-21ae-4954-945d-d7f716392dbf]
+description = "normalize case"
+
+[4185a902-bdb0-4074-864c-f416e42a0f19]
+description = "with apostrophes"
+include = false
+
+[4ff6c7d7-fcfc-43ef-b8e7-34ff1837a2d3]
+description = "with apostrophes"
+reimplements = "4185a902-bdb0-4074-864c-f416e42a0f19"
+
+[be72af2b-8afe-4337-b151-b297202e4a7b]
+description = "with quotations"
+
+[8d6815fe-8a51-4a65-96f9-2fb3f6dc6ed6]
+description = "substrings from the beginning"
+
+[c5f4ef26-f3f7-4725-b314-855c04fb4c13]
+description = "multiple spaces not detected as a word"
+
+[50176e8a-fe8e-4f4c-b6b6-aa9cf8f20360]
+description = "alternating word separators not detected as a word"
+
+[6d00f1db-901c-4bec-9829-d20eb3044557]
+description = "quotation for word with apostrophe"
diff --git a/exercises/practice/word-count/run_test.v b/exercises/practice/word-count/run_test.v
@@ -0,0 +1,139 @@
+module main
+
+fn test_count_one_word() {
+	expected := {
+		'word': 1
+	}
+	assert count_words('word') == expected
+}
+
+fn test_count_one_of_each_word() {
+	expected := {
+		'one':  1
+		'of':   1
+		'each': 1
+	}
+	assert count_words('one of each') == expected
+}
+
+fn test_multiple_occurrences_of_a_word() {
+	expected := {
+		'one':  1
+		'fish': 4
+		'two':  1
+		'red':  1
+		'blue': 1
+	}
+	assert count_words('one fish two fish red fish blue fish') == expected
+}
+
+fn test_handles_cramped_lists() {
+	expected := {
+		'one':   1
+		'two':   1
+		'three': 1
+	}
+	assert count_words('one,two,three') == expected
+}
+
+fn test_handles_expanded_lists() {
+	expected := {
+		'one':   1
+		'two':   1
+		'three': 1
+	}
+	assert count_words('one,\ntwo,\nthree') == expected
+}
+
+fn test_ignore_punctuation() {
+	expected := {
+		'car':        1
+		'carpet':     1
+		'as':         1
+		'java':       1
+		'javascript': 1
+	}
+	assert count_words('car: carpet as java: javascript!!&@$%^&') == expected
+}
+
+fn test_include_numbers() {
+	expected := {
+		'testing': 2
+		'1':       1
+		'2':       1
+	}
+	assert count_words('testing, 1, 2 testing') == expected
+}
+
+fn test_normalize_case() {
+	expected := {
+		'go':   3
+		'stop': 2
+	}
+	assert count_words('go Go GO Stop stop') == expected
+}
+
+fn test_with_apostrophes() {
+	expected := {
+		'first':   1
+		"don't":   2
+		'laugh':   1
+		'then':    1
+		'cry':     1
+		"you're":  1
+		'getting': 1
+		'it':      1
+	}
+	assert count_words("'First: don't laugh. Then: don't cry. You're getting it.'") == expected
+}
+
+fn test_with_quotations() {
+	expected := {
+		'joe':     1
+		"can't":   1
+		'tell':    1
+		'between': 1
+		'large':   2
+		'and':     1
+	}
+	assert count_words("Joe can't tell between 'large' and large.") == expected
+}
+
+fn test_substrings_from_the_beginning() {
+	expected := {
+		'joe':     1
+		"can't":   1
+		'tell':    1
+		'between': 1
+		'app':     1
+		'apple':   1
+		'and':     1
+		'a':       1
+	}
+	assert count_words("Joe can't tell between app, apple and a.") == expected
+}
+
+fn test_multiple_spaces_not_detected_as_a_word() {
+	expected := {
+		'multiple':    1
+		'whitespaces': 1
+	}
+	assert count_words(' multiple   whitespaces') == expected
+}
+
+fn test_alternating_word_separators_not_detected_as_a_word() {
+	expected := {
+		'one':   1
+		'two':   1
+		'three': 1
+	}
+	assert count_words(",\n,one,\n ,two \n 'three'") == expected
+}
+
+fn test_quotation_for_word_with_apostrophe() {
+	expected := {
+		'can':   1
+		"can't": 2
+	}
+	assert count_words("can, can't, 'can't'") == expected
+}
diff --git a/exercises/practice/word-count/word-count.v b/exercises/practice/word-count/word-count.v
@@ -0,0 +1,4 @@
+module main
+
+fn count_words(sentence string) map[string]int {
+}