Open In App

Minimum size of subset od String with frequency more than half of Array

Last Updated : 12 May, 2024
Improve
Improve
Like Article
Like
Save
Share
Report

Given an Array of Strings (Arr), the task is to find the smallest subset of strings in the array such that the total count of those selected strings exceeds 50% of the size of the original array. In other words, find the minimum set of distinct strings that constitutes over 50% of the array’s elements.

Examples:

Input: Arr = [‘shoes’, ‘face’, ‘pizza’, ‘covid’, ‘shoes’, ‘covid’, ‘covid’, ‘face’, ‘shoes’]
Output: [‘covid’, ‘shoes’]
Explanation: Frequency of the strings is as follows: ‘shoes’ : 3, ‘covid’ : 3, ‘face’ : 2, ‘pizza’ : 1
So ‘shoes’ (3) + ‘covid’ (3) = 6 makes greater than the size of the array.

Input: Arr = [‘java’, ‘python’, ‘java’, ‘python’, ‘python’]
Output: [‘python’]
Explanation: Frequency of the strings is as follows: ‘python’ : 3, ‘java’ : 2.
So ‘python’ (3) makes greater than the size of the array.

Source: MindTickle Off-Campus Full Time Interview Experience

Approach #1 :

Iterate through the arr and form a key in dictionary of newly occurred element or if element is already occurred, increase its value by 1 to count the frequency and then sort the dictionary in decreasing order and iterate through the dictionary until we get a subset.

Below is the implementation of the above approach:

C++
#include <iostream>
#include <vector>
#include <unordered_map>
#include <algorithm>

using namespace std;

// Function to find the minimum subset of strings that
// exceeds half of the array
vector<string> minSubsetToExceedHalf(string arr[], int n)
{
    // Initialize a Map to store string frequencies
    unordered_map<string, int> frequency;

    // Calculate the threshold frequency to exceed half
    // of the array
    int maxFreq = (n / 2) + 1;

    // Initialize a list to store the selected strings
    vector<string> maxFreqStrings;

    // Count the frequency of each string in the array
    for (int i = 0; i < n; ++i) {
        frequency[arr[i]]++;
    }

    // Sort the Map by frequency in descending order
    vector<pair<string, int>> sortedFrequency(frequency.begin(), frequency.end());
    sort(sortedFrequency.begin(), sortedFrequency.end(),
         [](const pair<string, int>& a, const pair<string, int>& b) {
             return a.second > b.second;
         });

    // Initialize a variable to keep track of the
    // current frequency sum
    int currFreq = 0;

    // Iterate through the sorted Map and select strings
    // until the threshold is reached
    for (const auto& entry : sortedFrequency) {
        maxFreqStrings.push_back(entry.first);
        currFreq += entry.second;

        // Check if the threshold is exceeded, and if
        // so, break out of the loop
        if (currFreq >= maxFreq) {
            break;
        }
    }

    return maxFreqStrings;
}

// Driver Code
int main()
{
    string arr[] = { "shoes", "face",  "pizza", "covid", "shoes",
                     "covid", "covid", "face",  "shoes" };
    int n = sizeof(arr) / sizeof(arr[0]);

    // Calling and printing the result
    vector<string> result = minSubsetToExceedHalf(arr, n);
    for (const auto& str : result) {
        cout << str << " ";
    }
    cout << endl;

    return 0;
}
Java
import java.util.*;

public class Main {
    // Function to find the minimum subset of strings that
    // exceeds half of the array
    static List<String> minSubsetToExceedHalf(String[] arr)
    {
        // Initialize a Map to store string frequencies
        Map<String, Integer> frequency = new HashMap<>();

        // Calculate the threshold frequency to exceed half
        // of the array
        int maxFreq = (arr.length / 2) + 1;

        // Initialize a list to store the selected strings
        List<String> maxFreqStrings = new ArrayList<>();

        // Count the frequency of each string in the array
        for (String string : arr) {
            frequency.put(string,
                          frequency.getOrDefault(string, 0)
                              + 1);
        }

        // Sort the Map by frequency in descending order
        List<Map.Entry<String, Integer> > sortedFrequency
            = new ArrayList<>(frequency.entrySet());
        sortedFrequency.sort(
            (a, b) -> b.getValue() - a.getValue());

        // Initialize a variable to keep track of the
        // current frequency sum
        int currFreq = 0;

        // Iterate through the sorted Map and select strings
        // until the threshold is reached
        for (Map.Entry<String, Integer> entry :
             sortedFrequency) {
            maxFreqStrings.add(entry.getKey());
            currFreq += entry.getValue();

            // Check if the threshold is exceeded, and if
            // so, break out of the loop
            if (currFreq >= maxFreq) {
                break;
            }
        }

        return maxFreqStrings;
    }

    // Driver Code
    public static void main(String[] args)
    {
        String[] arr
            = { "shoes", "face",  "pizza", "covid", "shoes",
                "covid", "covid", "face",  "shoes" };

        // Calling and printing the result
        System.out.println(
            String.join(" ", minSubsetToExceedHalf(arr)));
    }
}
Python
def min_subset_to_exceed_half(arr):
    # Initialize a dictionary to store string frequencies
    frequency = {}

    # Calculate the threshold frequency to exceed half of the array
    max_freq = (len(arr) // 2) + 1

    # Initialize a list to store the selected strings
    max_freq_strings = []

    # Count the frequency of each string in the array
    for string in arr:
        if string in frequency:
            frequency[string] += 1
        else:
            frequency[string] = 1

    # Sort the dictionary by frequency in descending order
    sorted_frequency = dict(
        sorted(frequency.items(), key=lambda item: item[1], reverse=True))

    # Initialize a variable to keep track of the current frequency sum
    curr_freq = 0

    # Iterate through the sorted dictionary and select strings until the threshold is reached
    for i in sorted_frequency:
        max_freq_strings.append(i)
        curr_freq += sorted_frequency[i]

        # Check if the threshold is exceeded, and if so, break out of the loop
        if curr_freq & gt
        = max_freq:
            break

    return max_freq_strings


# Driver Code
arr = [ & quot
       shoes&quot, & quot
       face&quot
       , & quot
       pizza&quot
        , & quot
       covid&quot
        ,
       & quot
       shoes&quot
        , & quot
       covid&quot
        , & quot
       covid&quot
        , & quot
       face&quot
        , & quot
       shoes&quot
       ]
# Calling and printing the result
print(*min_subset_to_exceed_half(arr))  # Output: shoes covid
Javascript
// JavaScript code for the above approach:
function minSubsetToExceedHalf(arr) {
    // Initialize a Map to store string frequencies
    const frequency = new Map();

    // Calculate the threshold frequency to exceed half of the array
    const maxFreq = Math.floor(arr.length / 2) + 1;

    // Initialize an array to store the selected strings
    const maxFreqStrings = [];

    // Count the frequency of each string in the array
    for (const string of arr) {
        if (frequency.has(string)) {
            frequency.set(string, frequency.get(string) + 1);
        } else {
            frequency.set(string, 1);
        }
    }

    // Sort the Map by frequency in descending order
    const sortedFrequency = new Map(
        [...frequency.entries()].sort((a, b) => b[1] - a[1])
    );

    // Initialize a variable to keep track of the current frequency sum
    let currFreq = 0;

    // Iterate through the sorted Map and select strings until the threshold is reached
    for (const [key, value] of sortedFrequency) {
        maxFreqStrings.push(key);
        currFreq += value;

        // Check if the threshold is exceeded, and if so, break out of the loop
        if (currFreq >= maxFreq) {
            break;
        }
    }

    return maxFreqStrings;
}

// Driver Code
const arr = ["shoes", "face", "pizza", "covid", "shoes", "covid", "covid", "face", "shoes"];

// Calling and printing the result
console.log(minSubsetToExceedHalf(arr).join(' ')); 

Output
covid shoes

Time Complexity: O(N Log N),
Auxiliary Space: O(N), where N represents the number of unique strings in the input array.

Approach #2: Using collections.counter():

The most suggested method that could be used to find all occurrences is this method, which actually gets all element frequencies and could also be used to print single element frequencies if required.

Below is the implementation of the above approach:

C++
#include <algorithm>
#include <iostream>
#include <unordered_map>
#include <vector>

using namespace std;

vector<string> GFG(vector<string>& arr)
{
    // Count the frequency of each string in the array
    unordered_map<string, int> frequency;
    for (const string& str : arr) {
        frequency[str]++;
    }

    // Calculate the threshold frequency to exceed half of
    // the array
    int maxFreq = arr.size() / 2 + 1;

    // Initialize a list to store the selected strings
    vector<string> maxFreqStrings;

    // Sort the frequencies in descending order
    vector<pair<string, int> > sortedFrequency(
        frequency.begin(), frequency.end());
    sort(sortedFrequency.begin(), sortedFrequency.end(),
         [](const auto& a, const auto& b) {
             return a.second > b.second;
         });

    // Initialize a variable to keep track of current
    // frequency sum
    int currFreq = 0;
    for (const auto& entry : sortedFrequency) {
        maxFreqStrings.push_back(entry.first);
        currFreq += entry.second;
        // Check if the threshold is exceeded and if so
        // break out of the loop
        if (currFreq >= maxFreq) {
            break;
        }
    }

    return maxFreqStrings;
}

int main()
{
    // Input array
    vector<string> arr
        = { "shoes", "face",  "pizza", "covid", "shoes",
            "covid", "covid", "face",  "shoes" };
    vector<string> result = GFG(arr);
    for (const string& s : result) {
        cout << s << " ";
    }
    cout << endl;

    return 0;
}
Java
import java.util.*;

public class Main {
    public static List<String> GFG(String[] arr)
    {
        // Count the frequency of each string in the array
        Map<String, Integer> frequency = new HashMap<>();
        for (String string : arr) {
            frequency.put(string,
                          frequency.getOrDefault(string, 0)
                              + 1);
        }
        // Calculate the threshold frequency to exceed half
        // of the array
        int maxFreq = arr.length / 2 + 1;
        // Initialize a list to store the selected strings
        List<String> maxFreqStrings = new ArrayList<>();
        List<Map.Entry<String, Integer> > sortedFrequency
            = new ArrayList<>(frequency.entrySet());
        // Sort the frequencies in descending order
        Collections.sort(
            sortedFrequency,
            (a, b) -> b.getValue() - a.getValue());
        // Initialize a variable to keep track of current
        // frequency sum
        int currFreq = 0;
        for (Map.Entry<String, Integer> entry :
             sortedFrequency) {
            maxFreqStrings.add(entry.getKey());
            currFreq += entry.getValue();
            // Check if the threshold is exceeded and if so
            // break out of the loop
            if (currFreq >= maxFreq) {
                break;
            }
        }
        return maxFreqStrings;
    }

    public static void main(String[] args)
    {
        // Input array
        String[] arr
            = { "shoes", "face",  "pizza", "covid", "shoes",
                "covid", "covid", "face",  "shoes" };
        List<String> result = GFG(arr);
        for (String s : result) {
            System.out.print(s + " ");
        }
    }
}
Python
from collections import Counter


def min_subset_to_exceed_half(arr):
    # Count the frequency of each string in the array using Counter
    frequency = Counter(arr)

    # Calculate the threshold frequency to exceed half of the array
    max_freq = (len(arr) // 2) + 1

    # Initialize a list to store the selected strings
    max_freq_strings = []

    # Sort the Counter by frequency in descending order
    sorted_frequency = dict(
        sorted(frequency.items(), key=lambda item: item[1], reverse=True))

    # Initialize a variable to keep track of the current frequency sum
    curr_freq = 0

    # Iterate through the sorted dictionary and select strings until the threshold is reached
    for i in sorted_frequency:
        max_freq_strings.append(i)
        curr_freq += sorted_frequency[i]

        # Check if the threshold is exceeded, and if so, break out of the loop
        if curr_freq & gt
        = max_freq:
            break

    return max_freq_strings


# Driver Code
arr = [ & quot
       shoes&quot, & quot
       face&quot
       , & quot
       pizza&quot
        , & quot
       covid&quot
        ,
       & quot
       shoes&quot
        , & quot
       covid&quot
        , & quot
       covid&quot
        , & quot
       face&quot
        , & quot
       shoes&quot
       ]
# Calling and printing the result
print(*min_subset_to_exceed_half(arr))  # Output: shoes covid
JavaScript
function GFG(arr) {
    // Count the frequency of each string in the array
    const frequency = {};
    for (const string of arr) {
        frequency[string] = (frequency[string] || 0) + 1;
    }
    // Calculate the threshold frequency to the exceed half of the array
    const maxFreq = Math.floor(arr.length / 2) + 1;
    // Initialize an array to store the selected strings
    const maxFreqStrings = [];
    const sortedFrequency = Object.entries(frequency)
        .sort((a, b) => b[1] - a[1]);
    // Initialize a variable to keep track of current frequency sum
    let currFreq = 0;
    for (const [string, count] of sortedFrequency) {
        maxFreqStrings.push(string);
        currFreq += count;
        // Check if the threshold is exceeded and if so
        // break out of the loop
        if (currFreq >= maxFreq) {
            break;
        }
    }
    return maxFreqStrings;
}
// Input array
const arr = ["shoes", "face", "pizza", "covid",
             "shoes", "covid", "covid", "face", "shoes"];
console.log(...GFG(arr)); 

Output
covid shoes 

Time Complexity: O(N Log N),
Auxiliary Space: O(N), where N represents the number of unique strings in the input array.



Like Article
Suggest improvement
Share your thoughts in the comments

Similar Reads