% Performs arithmetic encoding and decoding

clear all
format long

% Define your symbol probabilities here

% Simple test
% probabilities = [0.1 0.2 0.3 0.4];

% Letters        A      B      C      D      E      F      G      H      I      J      K      L      M      N      O      P      Q      R      S      T      U      V      W      X      Y      Z
probabilities = [0.0856 0.0139 0.0279 0.0378 0.1304 0.0289 0.0199 0.0528 0.0627 0.0013 0.0042 0.0339 0.0249 0.0707 0.0797 0.0199 0.0012 0.0677 0.0607 0.1045 0.0249 0.0092 0.0149 0.0017 0.0199 0.0008];

% Max-Lloyd quantiser of Gaussian distributed samples having zero mean and unity variance
% 2 levels:      -0.7970    0.7970
%probabilities = [ 0.5000    0.5000];

% 3 levels:      -1.2242         0    1.2242
%probabilities = [ 0.2706    0.4588    0.2706];

% 4 levels:      -1.5103   -0.4520    0.4520    1.5103
%probabilities = [ 0.1634    0.3366    0.3366    0.1634];

% 5 levels:      -1.7228   -0.7633         0    0.7633    1.7228
%probabilities = [ 0.1071    0.2439    0.2979    0.2439    0.1071];

% 6 levels:      -1.8941   -1.0011   -0.3182    0.3182    1.0011    1.8941
%probabilities = [ 0.0735    0.1813    0.2452    0.2452    0.1813    0.0735];

% 7 levels:      -2.0366   -1.1893   -0.5613         0    0.5613    1.1893    2.0366
%probabilities = [ 0.0534    0.1373    0.1992    0.2202    0.1992    0.1373    0.0534];

% 8 levels:      -2.1491   -1.3421   -0.7536   -0.2441    0.2441    0.7536    1.3421    2.1491
%probabilities = [ 0.0406    0.1070    0.1611    0.1913    0.1913    0.1611    0.1070    0.0406];

% Sum of two dice   2  3  4  5  6  7  8  9 10 11 12
%probabilities =    [1  2  3  4  5  6  5  4  3  2  1]/36;

% Normalise the symbol probabilities
probabilities = probabilities/sum(probabilities);

% Determine the cumulative symbol probabilities
cumulative_probabilities = [0,cumsum(probabilities)];

% Encoder
% =======

% Define your symbol sequence here
symbols = [8 5 12 12 15 23 15 18 12 4]; %helloworld

% Initialise the range of values to 0-1
lower = 0;
upper = 1;
% For each symbol...
for symbol_index = 1:length(symbols)
    % ...narrow down the range depending on the symbol values
    
    difference = upper - lower;    
    
    % Display the ranges that are associated with each symbol value
    lower + difference*cumulative_probabilities
    
    % Choose the range that corresponds to the next symbol value
    upper = lower + difference*cumulative_probabilities(symbols(symbol_index)+1);
    lower = lower + difference*cumulative_probabilities(symbols(symbol_index));
end

% Look for a value within the identified range, starting from a value of 0
value = 0;
% Power of 0.5 to consider
power = 1;
% Initialise an empty bit sequence 
bits = [];
% Keep going until the shortest binary number that represents a fraction within the identified range has been found
while value < lower
    % If the next power of 0.5 keeps the value below upper...
    if value + 0.5^power <= upper
        % ...add this to our value
        value = value + 0.5^power;
        % Concatenate a 1 onto our bit sequence
        bits = [bits,1];
    else
        % Concatenate a 0 onto our bit sequence
        bits = [bits,0];
    end
    % Increase the power of 0.5
    power = power + 1;
end


% Display the resultant bit sequence
bits

% Decoder
% =======

% Convert the binary number into a fraction
value = sum(bits.*0.5.^(1:length(bits)))

% Create some storage for the recovered symbols. The number of symbols is assumed to be known to the decoder.
recovered_symbols = zeros(1,length(symbols));

% Initialise the ranges for each symbol to the cumulative probabilities
ranges = cumulative_probabilities;

% For each symbol...
for symbol_index = 1:length(recovered_symbols)
    % Find the symbol value which corresponds to the range in which our value falls
    recovered_symbols(symbol_index) = 1;
    while value > ranges(recovered_symbols(symbol_index)+1)
        recovered_symbols(symbol_index) = recovered_symbols(symbol_index)+1;
    end
    
    % Update the ranges that correspond to each symbol value
    lower = ranges(recovered_symbols(symbol_index));
    upper = ranges(recovered_symbols(symbol_index)+1);
    difference = upper-lower;  
    ranges = lower + difference*cumulative_probabilities;    
end

% Display the recovered symbols
recovered_symbols