% This file simulates the type score distributions as in the actual model and generates Figure 1 and Table A6.

% Setup
periodlength = 3;       % how many years per period?
startage = 20;          % what age (years) start to work?
periods = 15;           % how many periods of work?

plot_period = [2 8 14]; % what periods to plot?

nindividual = 1e7;      % sample size

nbins = 21;                     %number of type score bins in Model solution

edu_size = [.62 .38];           % education shares non-college / college
shareCol = zeros(1,2);          % share college in each edu group

eduGroups = {'hs','col'};   % Initialize education groups

%Loop through education groups
for ee = 1:length(eduGroups)

%% Load benchmark probabilities for transitory income shock

    edu = eduGroups{ee};
    PR = [];
    PG = PR;
    prior_prob = PR;

    for i = 1:length(experimentFolders)
        if contains(experimentFolders(i).name,'benchmark') && contains(experimentFolders(i).name,edu)
            temp = importdata(fullfile(runDir,experimentFolders(i).name,'inputRevised.txt'));
            PR = temp(86:88)';
            PG = temp(89:91)';
            prior_prob = 1-temp(22);
            nbins = temp(8)+1;
            clear temp
            break
        end
    end

    if isempty(PR) || isempty(PG)
        % if loading failed, take backup probabilities
        PR = [0.09590026372573  0.80000000000000  0.10409973627427];
        PG = [0.12323183888756  0.80000000000000  0.07676816111244];
        prior_prob = 0.75;
        warning('No Benchmark loaded. Using backup values:')

    elseif max(abs(sum(PR)-1),abs(sum(PG)-1)) > 1e-12
        warning('Transitory probabilities dont sum up to 1. Continuing...')
    
    elseif prior_prob > 1 || prior_prob < 0
        warning('Prior is not in [0,1]. Continuing...')
    end
    
    % binsize as in Model Solution
    binsize = 100/(nbins-1);
    
    % Population sizes
    popsize{ee}.b = (1-prior_prob).*edu_size(ee);
    popsize{ee}.r = prior_prob.*edu_size(ee);


    %% Simulate Type Scores

    % aggregate probabilities
    PPR = cumsum(PR);
    PPG = cumsum(PG);

    % draw random realizations
    omega_income=rand(periods,nindividual); 	%underlying randomness for income
    omega_score=rand(periods,nindividual);      %underlying randomness for score update

    yrational = 3*ones(size(omega_income));
    ybehavioral = yrational;

    % generate transitory shock realizations
    for ii = 2:-1:1
        yrational(omega_income<PPR(ii)) = ii;
        ybehavioral(omega_income<PPG(ii)) = ii;
    end


    %initialize probabilities of rational scores
    sR = [ones(1,size(yrational,2))*prior_prob; zeros(size(yrational))];
    sB = sR;


    % Discrete (& probabilistic) Bayesian Updating, given transitory shock realizations
    for ii=2:periods+1
        %Realist
        posteriorR = 100 .* PR(yrational(ii-1,:)) .* sR(ii-1,:) ./ ...
            ( PR(yrational(ii-1,:)) .* sR(ii-1,:) + PG(yrational(ii-1,:)) .* (1-sR(ii-1,:)) );
        isprimeR = floor(posteriorR./binsize) + 1;                  %calculate lower score bin
        lowbinprobR = (isprimeR.*binsize - posteriorR)./binsize;    %prob to stay in low bin (=distance)

        sR(ii,:) = (isprimeR + (omega_score(ii-1,:)>lowbinprobR) .* (isprimeR<nbins) - 1).*binsize./100;   %calculate new score, increase score with prob = 1-lowbinprob

        %Behavioral
        posteriorB = 100 .* PR(ybehavioral(ii-1,:)) .* sB(ii-1,:) ./ ...
            ( PR(ybehavioral(ii-1,:)) .* sB(ii-1,:) + PG(ybehavioral(ii-1,:)) .* (1-sB(ii-1,:)) );
        isprimeG = floor(posteriorB./binsize) + 1;                  %calculate lower score bin
        lowbinprobG = (isprimeG.*binsize - posteriorB)./binsize;    %prob to stay in low bin (=distance)

        sB(ii,:) = (isprimeG + (omega_score(ii-1,:)>lowbinprobG) .* (isprimeG<nbins) - 1).*binsize./100;   %increase score with prob = 1-lowbinprob
    end



    %% Calculate distribution

    % Discrete Type Score bins as in model solution
    if ee == 1
        centers = min(min(sR(:)),min(sB(:))):binsize./100:max(max(sR(:)),max(sB(:)));
        edges = [centers-binsize./100./2 centers(end)+binsize./100./2];
    end

    % Count Type Scores of all ages for histograms
    counts{ee}.r = histcounts(sR(:),edges);
    counts{ee}.b = histcounts(sB(:),edges);

    % Calculate histograms (PDFs) for various ages  
    for iage = 1:length(plot_period)
        scoreHist{ee}.r(iage,:) = histcounts(sR(plot_period(iage),:),edges,'Normalization','probability');
        scoreHist{ee}.b(iage,:) = histcounts(sB(plot_period(iage),:),edges,'Normalization','probability');
    end

end

%% Plot Figure 1
        
Figure1 = figure('Visible', 'off');
for ee = 1:length(eduGroups)
    for aa = 1:length(plot_period)
        subplot(2,3,(ee-1)*length(plot_period)+aa)
        histogram('BinEdges', edges, 'BinCounts', scoreHist{ee}.r(aa,:),'Normalization', 'probability')
        hold on
        histogram('BinEdges', edges, 'BinCounts', scoreHist{ee}.b(aa,:),'Normalization', 'probability')%,'LineStyle','--','LineWidth',2)
        if ee == 1 && aa == 1
            legend('Rational','Behavioral','Location','NW')
        end
        title(['Age ',num2str(plot_period(aa)*periodlength+startage),', ',eduGroups{ee}])
        xlabel('Type Score')
    end
end

saveas(Figure1,fullfile(outFolder,'Figure1.png'))

%% Calculate Type Score CDFs: Table A6

cdf_matrix = [centers ; ... Type Score Bins
    cumsum(counts{1}.r)/sum(counts{1}.r).*100 ; ... Non-College Rational
    cumsum(counts{1}.b)/sum(counts{1}.b).*100 ; ... Non-College Behavioral
    cumsum(counts{2}.r)/sum(counts{2}.r).*100 ; ... College Rational
    cumsum(counts{2}.b)/sum(counts{2}.b).*100 ; ... College Behavioral
    (cumsum(counts{1}.r).*popsize{1}.r+cumsum(counts{2}.r).*popsize{2}.r)./(sum(counts{1}.r).*popsize{1}.r+sum(counts{2}.r).*popsize{2}.r).*100; ...  Full Pop Rational
    (cumsum(counts{1}.b).*popsize{1}.b+cumsum(counts{2}.b).*popsize{2}.b)./(sum(counts{1}.b).*popsize{1}.b+sum(counts{2}.b).*popsize{2}.b).*100; ...  Full Pop Behavioral
    (cumsum(counts{1}.b).*popsize{1}.b+cumsum(counts{2}.b).*popsize{2}.b+cumsum(counts{1}.r).*popsize{1}.r+cumsum(counts{2}.r).*popsize{2}.r)./(sum(counts{1}.b).*popsize{1}.b+sum(counts{2}.b).*popsize{2}.b+sum(counts{1}.r).*popsize{1}.r+sum(counts{2}.r).*popsize{2}.r).*100]; % Full Pop


fileID = fopen([outFolder,'TableA6.tex'],'w');
fprintf(fileID,'%18s \r\n','\begin{table}[h]');
fprintf(fileID,'%18s \r\n','\centering');
fprintf(fileID,'%18s \r\n','\caption{Type-Score Distribution Across Types (CDF)}');
fprintf(fileID,'%18s \r\n','\begin{tabular}{lrrrrrrr}');
fprintf(fileID,'%18s \r\n','\toprule');
fprintf(fileID,'%5s &  %18s &  %18s  &  %18s    \\\\ \r\n','  ','\multicolumn{2}{c}{Non-College}','\multicolumn{2}{c}{College}','\multicolumn{3}{c}{Full Population}');
fprintf(fileID,'%5s &  %5s  &  %5s  & %5s  &  %5s  &  %5s  &  %5s  &  %5s \\\\ \r\n','Score', 'Realist', 'Behavioral','Realist', 'Behavioral','Realist', 'Behavioral','All Types');
fprintf(fileID,'%18s \r\n','\midrule');
fprintf(fileID,'%6.2f & %6.2f\\%% & %6.2f\\%% & %6.2f\\%% & %6.2f\\%%  & %6.2f\\%%  & %6.2f\\%% & %6.2f\\%%  \\\\ \r\n',cdf_matrix);
fprintf(fileID,'%18s \r\n','\midrule');
fprintf(fileID,'%18s  \\\\ \r\n','\multicolumn{4}{l}{Overall Population Shares}');
fprintf(fileID,' & %6.2f\\%% & %6.2f\\%% & %6.2f\\%% & %6.2f\\%% & %6.2f\\%%  & %6.2f\\%%  \\\\ \r\n',[popsize{1}.r popsize{1}.b popsize{2}.r popsize{2}.b popsize{1}.r+popsize{2}.r  popsize{1}.b+popsize{2}.b  ]*100);
fprintf(fileID,'%18s \r\n','\bottomrule');
fprintf(fileID,'%18s \r\n','    \end{tabular}');
fprintf(fileID,'%18s \r\n','\end{table}');
fclose(fileID);




disp('Success!')







