function [result, K_p] = blended_spectrum_fast(s,t,p,lambda) %BLENDED_SPECTRUM_FAST % -Finds the contiguous subsequence match count between strings s and t % by using a dynamic programming implementation, % for all substrings of length <= p, and with penalties lambda. % *(There is also a brute force implementation of this algorithm. % Type help blended_spectrum_fast_bf for info.) % -Is faster than blended_spectrum b/c this program only tries to fill in the last % index of the matrix, rather than the whole structure. % % -Simply prompting the function will return the value K(s,t), however % using the function as [result,K] = K(s,t) will also return the matrix K. % % -The following algorithm is used: % K[p](sa,t) = K[p](s,t) + [Summation of j from 1 to |t|] ( lambda^2 * K'[p](sa,t(1:j)) ) % K[p](s,t) = 0 if |s| == 0 or |t| == 0 % K'[p](sa,tb) = (1 + lambda^2 * K'[p-1](s,t)) [a == b] % K'[0](s,t) = 0 for all s,t % K'[p](s,t) = 0 if |s| == 0 or |t| == 0 % % -Example: blended_spectrum_fast_bf('abccc','abc', 2, 1) returns a value of 7. % (Note that blended_spectrum_fast_bf('abccc','abc',2, 1)= % blended_spectrum_fast_bf('abc','abccc',2, 1) % since K(s,t,p) = K(t,s,p) ). % -Example: blended_spectrum_fast_bf('a','a', 1, 1) returns a value of 1. % -Example: blended_spectrum_fast_bf('a','b', 1, 1) returns a value of 0. % -Example: blended_spectrum_fast_bf('ab','ab', 1, 1) returns a value of 2. % % % %USAGE: scalar = blended_spectrum_fast('string1','string2', p, lambda); (where p is the length of the subsequence) % % [scalar, matrix] = blended_spectrum_fast('string1,'string2', p, lambda); % % %For more information, visit http://www.kernel-methods.net/ %Written and tested in Matlab 6.0, Release 12. %Copyright 2003, Manju M. Pai 4/2003 %manju@kernel-methods.net %------------------------------------------------------------------------------------------ %Obtain lengths of strings [num_rows_s, n] = size(s); [num_rows_t, m] = size(t); %Initially set every matrix index to -1 to show value has not yet been found K_p = repmat(-1, [n, m]); %The main kernel K_s_p = repmat(-1, [n, m, p]); %The suffix kernel %Error checking statements: %Make sure input vectors are horizontal. if (num_rows_s ~= 1 | num_rows_t ~= 1) error('Error: s and t must be horizontal vectors.'); end; %If p is less than zero or not a number, program should quit due to faulty variable input. if p <= 0 | ischar(p) error('Error: p needs to be a number greater than 0.'); end; %If lambda is less than zero or not a number, program should quit due to faulty variable input. if lambda <= 0 | ischar(lambda) error('Error: lambda needs to be a number greater than 0.'); end; %End of error checking %Fill in the rest of the matrix using the function blended_spectrum_fast(s,t) [K_p(n,m), K_s_p] = blended_spectrum_fast_kernel(s, t, K_p, K_s_p, p, lambda); result = K_p(n,m); %------------------------------------------------------------------------------------------ function [ans, K_s_p] = blended_spectrum_fast_kernel(sa, t, K_p, K_s_p, p, lambda) %This function is called by blended_spectrum_fast(). %Type 'help blended_spectrum_fast' for a description of the program. % %------------------------------------------------------------------------------------------ %Obtain lengths of both strings n = length(sa); m = length(t); %truncate last character of string s = sa(1:n-1); %Start algorithm: % 1) Split main algorithm into two parts: % a) K_p(s,t) if (length(s) == 0) | (length(t) == 0) %This is a base case where 0 is returned if either string has length 0 ans = 0; elseif( K_p( length(s), length(t) ) == -1 ) % Value has not yet been calculated ans = blended_spectrum_fast_kernel(s, t, K_p, K_s_p, p, lambda); else % Value has already been calculated ans = K_p( length(s), length(t) ); end; % b) Summation of (lambda^2)*K_s_p(sa,t(1:j)) %We need this 'for' loop as a cursor that iterates through the t string. for i = 1:m t_length = length(t(1:i)); if ( K_s_p( n, t_length, p) == -1 ) % Value has not yet been calculated [result, K_s_p] = suffix_kernel(sa, t(1:i), K_s_p, (p-1), lambda); K_s_p( length(sa), t_length, p) = result; %Store this newly calculated result into suffix kernel else % Value has already been calculated result = K_s_p( n, t_length, p); end; ans = ans + (lambda * lambda * result); end; return % End of algorithm %------------------------------------------------------------------------------------------ function [ans, K_s_p] = suffix_kernel(sa, tb, K_s_p, p, lambda) %This function is called by blended_spectrum_fast(). %Type 'help blended_spectrum_fast' for a description of the program. % %------------------------------------------------------------------------------------------ %Obtain lengths of both strings n = length(sa); m = length(tb); %if last characters of both strings do not match, return 0 if ~(strcmpi( sa(n), tb(m) ) ) ans = 0; return end; %truncate last character of string s = sa(1:n-1); t = tb(1:m-1); %Obtain lengths of truncated strings length_s = length(s); length_t = length(t); %Start algorithm: K_s_p(sa,tb) = (1 + lambda^2)*K_s_p-1(s,t)[a==b] if (p == 0) result = 0; elseif (length_s == 0) | (length_t == 0) %This is a base case where 0 is returned if either string has length 0 result = 0; elseif( K_s_p( length_s, length_t, p ) == -1 ) % Value has not yet been calculated [result, K_s_p] = suffix_kernel(s, t, K_s_p, (p-1), lambda); K_s_p( length_s, length_t, p) = result; else % Value has already been calculated result = K_s_p( length_s, length_t, p ); end; ans = 1 + (lambda * lambda * result); return