Skip to end of metadata
Go to start of metadata

You are viewing an old version of this page. View the current version.

Compare with Current View Page History

« Previous Version 2 Next »

In a German speaking environment, you need a language phonetic search implementation which is different from Oracles default soundex implementation.

We use the Oracle PL/SQL implementation of "Kölner Phonetik" below.

CREATE OR REPLACE PACKAGE PKG_PHONETIK AS 
/*
|| $Header: $
||
|| Name   : PCK_PHONETIK
|| Aufgabe: Funktionen für die Umsetzung der Kölner Phonetik 
|| Autor  : 13.02.2017, Jan Schreiber
||
*/
  
  FUNCTION GET_KP_STRING (
      I_STRING                IN VARCHAR2)
    RETURN VARCHAR2;
END PKG_PHONETIK;
/

CREATE OR REPLACE PACKAGE BODY PKG_PHONETIK AS
/*------------------------------------------------------------------------------
||
|| 2017.02.13 Jan Schreiber: An implementation of "Koelner Phonetik".
||
*/------------------------------------------------------------------------------
  FUNCTION GET_KP_STRING (
      i_string                IN VARCHAR2)
    RETURN VARCHAR2 IS
  l_word      varchar2(32767);
  l_code      varchar2(32767) := '';
  l_codechar  varchar2(2) := '';
  l_nextchar  char(1 CHAR);
  l_prevchar  char(1 CHAR);
  l_currchar  char(1 CHAR);
  l_lastpos   pls_integer;
  
  BEGIN
  
    l_word := upper(i_string);
    l_lastpos := length(l_word);
    for i in 1..l_lastpos loop
      l_currchar := substr(l_word, i, 1);
      if i < l_lastpos then
        l_nextchar := substr(l_word, i + 1, 1);
      else
        l_nextchar := '_';
      end if;
      if i > 1 then
        l_prevchar := substr(l_word, i - 1, 1);
      else
        l_prevchar := '_';
      end if;
      if l_currchar in ('A','E','I','J','O','U','Y','-','_','+') then
        l_codechar := '0';
      elsif l_currchar = 'B' then
        l_codechar := '1';
      elsif l_currchar = 'P' and not l_nextchar = 'H' then
        l_codechar := '1';
      elsif l_currchar in ('D','T') then
        if not l_nextchar in ('C','S','?','Z') then
          l_codechar := '2';
        else
          l_codechar := '8';
        end if;
      elsif l_currchar in ('F','V','W') or (l_currchar = 'P' and l_nextchar = 'H') then
        l_codechar := '3';
      elsif l_currchar in ('G','K','Q') then
        l_codechar := '4';
      elsif l_currchar ='C' then
        if i = 1 then
          if l_nextchar in ('A','H', 'K','L', 'O', 'Q', 'R', 'U', 'X') then
            l_codechar := '4';
          else
            l_codechar := '8';
          end if;
        else
          if l_nextchar in ('A','H','K','O','Q','U','X') and not l_prevchar in ('ß','S','Z') then
            l_codechar := '4';
          else
            l_codechar := '8';
          end if;
        end if;
      elsif l_currchar = 'X' then
        if l_prevchar in ('C','K','Q') then
          l_codechar := '8';
        else
          l_codechar := '48';
        end if;
      elsif l_currchar = 'L' then
        l_codechar := '5';
      elsif l_currchar in ('M','N') then
        l_codechar := '6';
      elsif l_currchar = 'R' then
        l_codechar := '7';
      elsif l_currchar in ('S','Z','ß') then
        l_codechar := '8';
      end if;
      if l_code is null then
        l_code := l_code || l_codechar;
      else
        if not l_codechar = 0 and not substr(l_code, length(l_code), 1) = l_codechar then
          l_code := l_code || l_codechar;
        end if;
      end if;
    end loop;
    RETURN l_code;
  END GET_KP_STRING;
END PKG_PHONETIK;
/

 

The implementation table used is build on the article in German Wikipedia mentioned above:

LetterContextCode
A, E, I, J, O, U, Y 0
H -
B 1
Pnot before H1
D, Tnot before C, S, Z2
F, V, W 3
Pbefore H3
G, K, Q 4
Cin Anlaut before A, H, K, L, O, Q, R, U, X4
Cbefore A, H, K, O, Q, U, X , but not behind S, Z4
Xnot behind C, K, Q48
L 5
M, N 6
R 7
S, Z 8
Cbehind S, Z8
Cin Anlaut, but not before A, H, K, L, O, Q, R, U, X8
Cnot before A, H, K, O, Q, U, X8
D, Tbefore C, S, Z8
Xafter C, K, Q8
  • No labels