Find top k (or most frequent) numbers in a stream
Last Updated :
03 Jan, 2023
Given an array of n numbers. Your task is to read numbers from the array and keep at-most K numbers at the top (According to their decreasing frequency) every time a new number is read. We basically need to print top k numbers sorted by frequency when input stream has included k distinct elements, else need to print all distinct elements sorted by frequency.
Examples:
Input : arr[] = {5, 2, 1, 3, 2}
k = 4
Output : 5 2 5 1 2 5 1 2 3 5 2 1 3 5
Explanation:
- After reading 5, there is only one element 5 whose frequency is max till now.
so print 5.
- After reading 2, we will have two elements 2 and 5 with the same frequency.
As 2, is smaller than 5 but their frequency is the same so we will print 2 5.
- After reading 1, we will have 3 elements 1, 2 and 5 with the same frequency,
so print 1 2 5.
- Similarly after reading 3, print 1 2 3 5
- After reading last element 2 since 2 has already occurred so we have now a
frequency of 2 as 2. So we keep 2 at the top and then rest of the element
with the same frequency in sorted order. So print, 2 1 3 5.
Input : arr[] = {5, 2, 1, 3, 4}
k = 4
Output : 5 2 5 1 2 5 1 2 3 5 1 2 3 4
Explanation:
- After reading 5, there is only one element 5 whose frequency is max till now.
so print 5.
- After reading 2, we will have two elements 2 and 5 with the same frequency.
As 2, is smaller than 5 but their frequency is the same so we will print 2 5.
- After reading 1, we will have 3 elements 1, 2 and 5 with the same frequency,
so print 1 2 5.
Similarly after reading 3, print 1 2 3 5
- After reading last element 4, All the elements have same frequency
So print, 1 2 3 4
Approach: The idea is to store the top k elements with maximum frequency. To store them a vector or an array can be used. To keep the track of frequencies of elements creates a HashMap to store element-frequency pairs. Given a stream of numbers, when a new element appears in the stream update the frequency of that element in HashMap and put that element at the end of the list of K numbers (total k+1 elements) now compare adjacent elements of the list and swap if higher frequency element is stored next to it.
Algorithm:
- Create a Hashmap hm, and an array of k + 1 length.
- Traverse the input array from start to end.
- Insert the element at k+1 th position of the array, and update the frequency of that element in HashMap.
- Now, iterate from the position of element to zero.
- For very element, compare the frequency and swap if a higher frequency element is stored next to it, if the frequency is the same then the swap is the next element is greater.
- print the top k element in each traversal of the original array.
Implementation:
C++
#include <bits/stdc++.h>
using namespace std;
void kTop( int a[], int n, int k)
{
vector< int > top(k + 1);
unordered_map< int , int > freq;
for ( int m = 0; m < n; m++) {
freq[a[m]]++;
top[k] = a[m];
auto it = find(top.begin(), top.end() - 1, a[m]);
for ( int i = distance(top.begin(), it) - 1; i >= 0; --i) {
if (freq[top[i]] < freq[top[i + 1]])
swap(top[i], top[i + 1]);
else if ((freq[top[i]] == freq[top[i + 1]])
&& (top[i] > top[i + 1]))
swap(top[i], top[i + 1]);
else
break ;
}
for ( int i = 0; i < k && top[i] != 0; ++i)
cout << top[i] << ' ' ;
}
cout << endl;
}
int main()
{
int k = 4;
int arr[] = { 5, 2, 1, 3, 2 };
int n = sizeof (arr) / sizeof (arr[0]);
kTop(arr, n, k);
return 0;
}
|
Java
import java.io.*;
import java.util.*;
class GFG {
static int find( int [] arr, int ele)
{
for ( int i = 0 ; i < arr.length; i++)
if (arr[i] == ele)
return i;
return - 1 ;
}
static void kTop( int [] a, int n, int k)
{
int [] top = new int [k + 1 ];
HashMap<Integer, Integer> freq = new HashMap<>();
for ( int i = 0 ; i < k + 1 ; i++)
freq.put(i, 0 );
for ( int m = 0 ; m < n; m++) {
if (freq.containsKey(a[m]))
freq.put(a[m], freq.get(a[m]) + 1 );
else
freq.put(a[m], 1 );
top[k] = a[m];
int i = find(top, a[m]);
i -= 1 ;
while (i >= 0 ) {
if (freq.get(top[i]) < freq.get(top[i + 1 ])) {
int temp = top[i];
top[i] = top[i + 1 ];
top[i + 1 ] = temp;
}
else if ((freq.get(top[i]) == freq.get(top[i + 1 ])) && (top[i] > top[i + 1 ])) {
int temp = top[i];
top[i] = top[i + 1 ];
top[i + 1 ] = temp;
}
else
break ;
i -= 1 ;
}
for ( int j = 0 ; j < k && top[j] != 0 ; ++j)
System.out.print(top[j] + " " );
}
System.out.println();
}
public static void main(String args[])
{
int k = 4 ;
int [] arr = { 5 , 2 , 1 , 3 , 2 };
int n = arr.length;
kTop(arr, n, k);
}
}
|
Python3
def kTop(a, n, k):
top = [ 0 for i in range (k + 1 )]
freq = {i: 0 for i in range (k + 1 )}
for m in range (n):
if a[m] in freq.keys():
freq[a[m]] + = 1
else :
freq[a[m]] = 1
top[k] = a[m]
i = top.index(a[m])
i - = 1
while i > = 0 :
if (freq[top[i]] < freq[top[i + 1 ]]):
t = top[i]
top[i] = top[i + 1 ]
top[i + 1 ] = t
else if ((freq[top[i]] = = freq[top[i + 1 ]]) and (top[i] > top[i + 1 ])):
t = top[i]
top[i] = top[i + 1 ]
top[i + 1 ] = t
else :
break
i - = 1
i = 0
while i < k and top[i] ! = 0 :
print (top[i],end = " " )
i + = 1
print ()
k = 4
arr = [ 5 , 2 , 1 , 3 , 2 ]
n = len (arr)
kTop(arr, n, k)
|
C#
using System;
using System.Collections.Generic;
class GFG {
static int find( int [] arr, int ele)
{
for ( int i = 0; i < arr.Length; i++)
if (arr[i] == ele)
return i;
return -1;
}
static void kTop( int [] a, int n, int k)
{
int [] top = new int [k + 1];
Dictionary< int ,
int >
freq = new Dictionary< int ,
int >();
for ( int i = 0; i < k + 1; i++)
freq.Add(i, 0);
for ( int m = 0; m < n; m++) {
if (freq.ContainsKey(a[m]))
freq[a[m]]++;
else
freq.Add(a[m], 1);
top[k] = a[m];
int i = find(top, a[m]);
i--;
while (i >= 0) {
if (freq[top[i]] < freq[top[i + 1]]) {
int temp = top[i];
top[i] = top[i + 1];
top[i + 1] = temp;
}
else if (freq[top[i]] == freq[top[i + 1]] && top[i] > top[i + 1]) {
int temp = top[i];
top[i] = top[i + 1];
top[i + 1] = temp;
}
else
break ;
i--;
}
for ( int j = 0; j < k && top[j] != 0; ++j)
Console.Write(top[j] + " " );
}
Console.WriteLine();
}
public static void Main(String[] args)
{
int k = 4;
int [] arr = { 5, 2, 1, 3, 2 };
int n = arr.Length;
kTop(arr, n, k);
}
}
|
Javascript
<script>
function find(arr, ele) {
for ( var i = 0; i < arr.length; i++)
if (arr[i] === ele) return i;
return -1;
}
function kTop(a, n, k) {
var top = new Array(k + 1).fill(0);
var freq = {};
for ( var i = 0; i < k + 1; i++) freq[i] = 0;
for ( var m = 0; m < n; m++) {
if (freq.hasOwnProperty(a[m])) freq[a[m]]++;
else freq[a[m]] = 1;
top[k] = a[m];
var i = find(top, a[m]);
i--;
while (i >= 0) {
if (freq[top[i]] < freq[top[i + 1]]) {
var temp = top[i];
top[i] = top[i + 1];
top[i + 1] = temp;
}
else if (freq[top[i]] === freq[top[i + 1]] &&
top[i] > top[i + 1])
{
var temp = top[i];
top[i] = top[i + 1];
top[i + 1] = temp;
} else break ;
i--;
}
for ( var j = 0; j < k && top[j] !== 0; ++j)
document.write(top[j] + " " );
}
document.write( "<br>" );
}
var k = 4;
var arr = [5, 2, 1, 3, 2];
var n = arr.length;
kTop(arr, n, k);
</script>
|
Output:
5 2 5 1 2 5 1 2 3 5 2 1 3 5
Complexity Analysis:
- Time Complexity: O( n * k ).
In each traversal the temp array of size k is traversed, So the time Complexity is O( n * k ).
- Space Complexity: O(n).
To store the elements in HashMap O(n) space is required.
Like Article
Suggest improvement
Share your thoughts in the comments
Please Login to comment...