Versions Compared

Key

  • This line was added.
  • This line was removed.
  • Formatting was changed.

...

Code Block
languagesql
CREATE OR REPLACE PACKAGE PKGPCK_PHONETIK AS 
/*
|| $Header: $
||
|| Name   : PCKPKG_PHONETIK
|| Aufgabe: FunktionenAn fürimplementation dieof Umsetzung der Kölner Phonetik"Koelner Phonetik" 
|| Autor  : 13.02.2017, Jan Schreiber
||
*/
  
  FUNCTION GET_KP_STRINGCODE (
      I_STRING                IN VARCHAR2)
    RETURN VARCHAR2;
END PKGPCK_PHONETIK;
/

CREATE OR REPLACE PACKAGE BODY PKGPCK_PHONETIK AS
/*------------------------------------------------------------------------------
||
|| 2017.02.13 Jan Schreiber: An implementation of "Koelner Phonetik".
|| 2017.02.14 Jan Schreiber: Renamed to PCK_PHONETIK
|| 2917.02.16 Jan Schreiber: Aligned with Java class ColognePhonetic 
||
*/------------------------------------------------------------------------------
/*
 FUNCTION GET_KP_STRING (
  This work is based on Andy Theilers github package: https://github.com/deezaster/germanphonetic
  and Carsten Czarskis APEX implementation: https://apex.oracle.com/pls/apex/germancommunities/apexcommunity/tipp/1502/index.html
  and the Java Commons 1.7 implementation ColognePhonetics from: http://archive.apache.org/dist/commons/codec/source/commons-codec-1.7-src.tar.gz
    
  Source code: https://wiki.loopback.org/confluence/x/HoDR
*/
  function soundex_ger (strword IN VARCHAR2,
    i_string                    intlen IN VARCHAR2)NUMBER DEFAULT 255)
  RETURNreturn VARCHAR2 IS is
 l_word 
     word varchar2(32767255);
  l_code   wordlen number;
 varchar2(32767) := ''   checklen number;
  l_codechar   code varchar2(2255);
:= '';   l_nextchar phoneticcode charvarchar2(1 CHAR255);
  l_prevchar  char(1 CHAR) intX number;
  l_currchar 
char(1 CHAR);  begin
l_lastpos   pls_integer;
     BEGIN   if intlen is null 
l_word := upper(i_string)        then 
          checklen := 255; 
   l_lastpos      else 
          checklen := length(l_word)intlen; 
   for i in 1..l_lastpos loop end if;
   
l_currchar         Word := lower(substr(l_word, i, 1)strWord,0,checkLen));
        if ilength(Word) < l_lastpos1 then return 0; end if;
    l_nextchar     Word := substr(l_word, i + 1, 1);REGEXP_REPLACE(
        REPLACE(
          REPLACE(
            Translate(Word, 'vwjyäöüéèêàáç', 'ffiiaoueeeaac'),
      else         l_nextchar := 'ph', '_f';),
      end if;       if i > 1 then'ß', 'ss'),
           l_prevchar  '[^a-zA-Z]', Null);
        wordlen := substrlength(l_word,);
i - 1, 1);      if wordlen = 1 
     else   then 
    l_prevchar       word := word || '_ ' ; 
        end if;
   
  if l_currchar in ('A','E','I','J','O','U','Y','-','_','+       if substr(word,1,1) = 'c' 
         then
           case
             when substr(word,2,1) in ('a','h','k','l','o','q','r','u','x') then
        l_codechar         Code := '04';
      elsif l_currchar       else
                Code := 'B8';
           end case;
           intX := 2;
        else
           intX := 1;
        end if;
   
        while intX <= wordlen loop
           case
             when substr(word,intX,1) in ('a','e','i','o','u') then
        l_codechar         code := code || '10';
      elsif l_currchar       when substr(word,intX,1) = 'Pb' andor not l_nextchar substr(Word,intX,1) = 'Hp' then
        l_codechar         code := code || '1';
      elsif l_currchar in     when substr('D','T')word,intX,1) = 'd' or  substr(Word,intX,1) = 't' then
                if notintX l_nextchar < wordlen then
                   case
                     when substr(word,intX+1,1) in ('Cc','Ss','?z','Z') then
          l_codechar               code := code || '28';
                     else
          l_codechar               code := code || '82';
                   end ifcase;
      elsif l_currchar in ('F','V','W') or (l_currchar = 'P' and l_nextchar = 'H')     else
                   code := code || '2';
                end if;
             when substr(word,intX,1) = 'f' then
        l_codechar         code := code || '3';
      elsif l_currchar       when substr(word,intX,1) in ('Gg','Kk','Qq') then
        l_codechar         code := code || '4';
      elsif l_currchar ='C      when substr(word,intX,1) = 'c' then
                if iintX =< 1wordlen then
          if l_nextchar         case
                     when substr(Word,intX+1,1) in ('Aa','Hh', 'Kk','Lo', 'Oq', 'Qu', 'R', 'U', 'X')x') then
                        case
                          when substr(Word,intX-1,1) = 's' or substr(Word,intX-1,1) = 'z' then
            l_codechar                  code := code || '48';
                          else
            l_codechar                  code := code || '84';
                        end ifcase;
        else           if l_nextchar in ('A','H','K','O','Q','U','X') and not l_prevchar in ('ß','S','Z') thenelse
                   l_codechar      code := code || '48';
                   end case;
                else
            l_codechar        code := code || '8';
                end if;
        end if;    when   elsif l_currchar substr(word,intX,1) = 'Xx' then
                if l_prevchar intX > 1 then
                   case
                     when substr(word,intX-1,1) in ('Cc','Kk','Qx') then
          l_codechar               code := code || '8';
                     else
          l_codechar               code := code || '48';
                   end case;
                else
                   code := code || '48';
                end if;
      elsif l_currchar       when substr(word,intX,1) = 'Ll' then
        l_codechar         code := code || '5';
      elsif l_currchar in     when substr('M','N')word,intX,1) = 'm' or substr(word,intX,1) = 'n' then
        l_codechar         code := code || '6';
      elsif l_currchar       when substr(word,intX,1) = 'Rr' then
        l_codechar         code := code || '7';
      elsif l_currchar in ('S','Z','ß')    when substr(word,intX,1) = 's' or substr(word,intX,1) = 'z' then
        l_codechar         code := code || '8';
      end if       else
                code := code;
           end case;
   
  if l_code is null then
         intX := intX + 1;
        end loop;
      
        phoneticcode := translate(regexp_replace(code, '(.)\1+', '\1'), '1234567890', '123456789');
       l_code := l_code || l_codechar 
       if  substr(code,1,1) = '0' 
       then 
        phoneticcode := '0' || phoneticcode;  
       end if;
      return phoneticcode;
   
   end soundex_ger;
   
  else function get_code 
    (i_string ifIN not l_codechar = 0 and not substr(l_code, length(l_code), 1) = l_codechar VARCHAR2) 
    return VARCHAR2 
    is
     len number;
     i number;
     k number;
     in_string varchar2(4000);
     out_string varchar2(4000);
     key_length constant number := 4;
   begin
    i := 1;
    out_string := null ;
    in_string := regexp_replace(substr(translate(i_string,'.,-;','    '),1,4000),  '([[:cntrl:]])|(^\t)', ' ');   
    len := length(in_string) ;
    while i <= len loop
      k := InStr(in_string, ' ', i);
      case
         when (k = i) then 
          i := i +1;
         when (k > i) then
          out_string := trim(out_string || ' ' || substr(soundex_ger(substr(in_string, i, k-i)),1,key_length)) ;
          i := k+1 ;
         when (k = 0) then
          lout_codestring := ltrim(out_codestring || l_codechar' ' || substr(soundex_ger(substr(in_string, i)),1,key_length)) ;
        end if  i := len +1;
      end ifcase;
    end loop;
    RETURNreturn lout_codestring;
  END GET_KP_STRINGend get_code;
END PKGPCK_PHONETIK;
/

 

The implementation table used is build on the article in German Wikipedia mentioned above:

LetterContextCode
A, E, I, J, O, U, Y 0
H -
B 1
Pnot before H1
D, Tnot before C, S, Z2
F, V, W 3
Pbefore H3
G, K, Q 4
Cin Anlaut before A, H, K, L, O, Q, R, U, X4
Cbefore A, H, K, O, Q, U, X , but not behind S, Z4
Xnot behind C, K, Q48
L 5
M, N 6
R 7
S, Z 8
Cbehind S, Z8
Cin Anlaut, but not before A, H, K, L, O, Q, R, U, X8
Cnot before A, H, K, O, Q, U, X8
D, Tbefore C, S, Z8
Xafter C, K, Q8