INSTALL splink_udfs FROM community;
LOAD splink_udfs;
CREATE OR REPLACE TEMP TABLE os_addresses(
uprn BIGINT,
addr VARCHAR,
postcode_group INTEGER
);
INSERT INTO os_addresses VALUES
(1,'1 RAINBOW ROAD ABBOTS LANGLEY',2),
(2,'2 RAINBOW ROAD ABBOTS LANGLEY',2),
(3,'3 RAINBOW ROAD ABBOTS LANGLEY',2),
(4,'4 RAINBOW ROAD ABBOTS LANGLEY',2),
(5,'5 RAINBOW ROAD ABBOTS LANGLEY',2),
(6,'6 RAINBOW ROAD ABBOTS LANGLEY',2),
(7,'7 RAINBOW ROAD ABBOTS LANGLEY',2),
(8,'ANNEX 7 RAINBOW ROAD ABBOTS LANGLEY',2),
(9,'9 RAINBOW ROAD ABBOTS LANGLEY',2),
(10,'10 RAINBOW ROAD ABBOTS LANGLEY',2),
(11,'11 RAINBOW ROAD ABBOTS LANGLEY',2),
(12,'12 RAINBOW ROAD ABBOTS LANGLEY',2),
(13,'13 RAINBOW ROAD ABBOTS LANGLEY',2),
(14,'14 RAINBOW ROAD ABBOTS LANGLEY',2),
(15,'15 RAINBOW ROAD ABBOTS LANGLEY',2),
(16,'16 RAINBOW ROAD ABBOTS LANGLEY',2),
(17,'17 RAINBOW ROAD ABBOTS LANGLEY',2),
(18,'18 RAINBOW ROAD ABBOTS LANGLEY',2),
(19,'19 RAINBOW ROAD ABBOTS LANGLEY',2),
(20,'20 RAINBOW ROAD ABBOTS LANGLEY',2),
(21,'21 RAINBOW ROAD ABBOTS LANGLEY',2),
(22,'22 RAINBOW ROAD ABBOTS LANGLEY',2),
(23,'MY LONG BUSINESS NAME 45 RAINBOW ROAD ABBOTS LANGLEY',2),
(34,'SOME OTHER ADDRESS 1 HIGH STREET',1);
CREATE OR REPLACE TEMP TABLE trie AS
WITH toks AS (
SELECT
postcode_group,
uprn,
string_split(regexp_replace(addr, '\\s+', ' '), ' ') AS t
FROM os_addresses
)
SELECT
postcode_group,
build_suffix_trie(uprn, t) AS trie
FROM toks
GROUP BY postcode_group;
WITH messy_addresses AS (
SELECT
2 AS postcode_group,
string_split(regexp_replace('MY BUSINESS 22 RAINBOW ROAD ABBOTS LANGLEY', '\\s+', ' '), ' ') AS tokens
)
SELECT
find_address(string_split('MY BUSINESS 22 RAINBOW ROAD ABBOTS LANGLEY HERTS', ' '), r.trie) AS uprn,
FROM messy_addresses m
JOIN trie r USING (postcode_group);