perl logo Perl logo (Thanks to Olaf Alders)

The weekly challenge 365 - Task 2: Valid Token Counter

  1 #!/usr/bin/env perl
  2 # https://theweeklychallenge.org/blog/perl-weekly-challenge-365/#TASK2
  3 #
  4 # Task 2: Valid Token Counter
  5 # ===========================
  6 #
  7 # You are given a sentence.
  8 #
  9 # Write a script to split the given sentence into space-separated tokens and
 10 # count how many are valid words. A token is valid if it contains no digits,
 11 # has at most one hyphen surrounded by lowercase letters, and at most one
 12 # punctuation mark (!, ., ,) appearing only at the end.
 13 #
 14 ## Example 1
 15 ##
 16 ## Input: $str = "cat and dog"
 17 ## Output: 3
 18 ##
 19 ## Tokens: "cat", "and", "dog"
 20 #
 21 ## Example 2
 22 ##
 23 ## Input: $str = "a-b c! d,e"
 24 ## Output: 2
 25 ##
 26 ## Tokens: "a-b", "c!", "d,e"
 27 ## "a-b" -> valid (one hyphen between letters)
 28 ## "c!"  -> valid (punctuation at end)
 29 ## "d,e" -> invalid (punctuation not at end)
 30 #
 31 ## Example 3
 32 ##
 33 ## Input: $str = "hello-world! this is fun"
 34 ## Output: 4
 35 ##
 36 ## Tokens: "hello-world!", "this", "is", "fun"
 37 ## All satisfy the rules.
 38 #
 39 ## Example 4
 40 ##
 41 ## Input: $str = "ab- cd-ef gh- ij!"
 42 ## Output: 2
 43 ##
 44 ## Tokens: "ab-", "cd-ef", "gh-", "ij!"
 45 ## "ab-"   -> invalid (hyphen not surrounded by letters)
 46 ## "cd-ef" -> valid
 47 ## "gh-"   -> invalid
 48 ## "ij!"   -> valid
 49 #
 50 ## Example 5
 51 ##
 52 ## Input: $str = "wow! a-b-c nice."
 53 ## Output: 2
 54 ##
 55 ## Tokens: "wow!", "a-b-c", "nice."
 56 ## "wow!"  -> valid
 57 ## "a-b-c" -> invalid (more than one hyphen)
 58 ## "nice." -> valid
 59 #
 60 ############################################################
 61 ##
 62 ## discussion
 63 ##
 64 ############################################################
 65 #
 66 # We split $str into the tokens and check each token. To check
 67 # each token, we encode the rules into a function which returns 0
 68 # if the token is invalid and 1 if it is valid. That way, we can
 69 # just add up all the results of is_valid() to obtain the final
 70 # result.
 71 
 72 use v5.36;
 73 
 74 valid_token_counter("cat and dog");
 75 valid_token_counter("a-b c! d,e");
 76 valid_token_counter("hello-world! this is fun");
 77 valid_token_counter("ab- cd-ef gh- ij!");
 78 valid_token_counter("wow! a-b-c nice.");
 79 
 80 sub valid_token_counter($str) {
 81     say "Input: \"$str\"";
 82     my @tokens = split /\s+/, $str;
 83     my $count = 0;
 84     foreach my $token (@tokens) {
 85         $count += is_valid($token);
 86     }
 87     say "Output: $count";
 88 }
 89 
 90 sub is_valid($token) {
 91     return 0 if $token =~ m/\d/;
 92     return 0 if $token =~ m/[!,.]./;
 93     return 0 if $token =~ m/-.*-/;
 94     return 0 if $token =~ m/[A-Z]-/;
 95     return 0 if $token =~ m/-[A-Z]/;
 96     return 0 if $token =~ m/[^\w]-/;
 97     return 0 if $token =~ m/-[^\w]/;
 98     return 0 if $token =~ m/^-/;
 99     return 0 if $token =~ m/-$/;
100     return 1 if $token =~ m/^[\w!,.-]+$/;
101     return 0;
102 }