Skip to content

Commit cdf8e7e

Browse files
New field added client-secret for Azure client secret ID. (#555)
* New field added azure-client-secret for Azure client secret ID. * Updated test cases
1 parent 67fd5d3 commit cdf8e7e

File tree

7 files changed

+221
-277
lines changed

7 files changed

+221
-277
lines changed

docs/gh_pages/docs/entityclassifier.md

+1
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ Below is the list of `entities` supported by Pebblo -
3131
1. RSA Private Key
3232
1. Google Account Private Key
3333
1. Github Fine Grained Token
34+
1. Azure Client Secret Key
3435

3536

3637
User can get details of classified entities for their loader source files in Pebblo report.

pebblo/entity_classifier/README.md

+1
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ And following Secret Entities:
2525
10. RSA Private Key
2626
11. Google Account Private Key
2727
12. Github Fine Grained Token
28+
13. Azure Client Secret Key
2829

2930
## How to use
3031
Entity Classifier

pebblo/entity_classifier/utils/config.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
"aws-access-key": ["aws_access_key", "aws_key", "access", "id", "api"],
1212
"aws-secret-key": ["aws_secret_key", "secret"],
1313
"azure-key-id": ["azure_key", "azure_key_id", "azure_id", "key"],
14-
"azure-client-secret": ["azure_client_secret", "client", "secret"],
14+
"azure-client-secret": ["azure_client_secret", "client-secret", "client_secret"],
1515
"google-api-key": ["google_api_key", "google_key", "google"],
1616
}
1717

pebblo/entity_classifier/utils/regex_pattern.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,6 @@
1212
"aws-access-key": r"""\b((?:AKIA|ABIA|ACCA|ASIA)[0-9A-Z]{16})\b""",
1313
"aws-secret-key": r"""\b([A-Za-z0-9+/]{40})[ \r\n'"\x60]""",
1414
"azure-key-id": r"""(?i)(%s).{0,20}([a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12})""",
15-
"azure-client-secret": r"""\b(?i)(%s).{0,20}([a-z0-9_\.\-~]{34})\b""",
15+
"azure-client-secret": r"""\b[a-fA-F0-9]{8}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{12}\b""",
1616
"google-api-key": r"""\bAIza[0-9A-Za-z\-_]{35}\b""",
1717
}
+17-18
Original file line numberDiff line numberDiff line change
@@ -1,28 +1,27 @@
11
mock_input_text1_anonymize_snippet_true = """
2-
<PERSON>'s SSN is <US_SSN>.
2+
Sachin's SSN is <US_SSN>.
33
ITIN number <US_ITIN>
44
His AWS Access Key is: <AWS_ACCESS_KEY>.
5-
And <PERSON> is: <GITHUB_TOKEN>
5+
And Github Token is: <GITHUB_TOKEN>
66
"""
77

88
mock_input_text2_anonymize_snippet_true = """
99
Content
10-
"<PERSON> board on <DATE_TIME> announced an interim dividend of Re 1 per equity share of the face value of Rs 2 each, i.e., a 50 per cent payout for <DATE_TIME> along with financial results for the <DATE_TIME> period of the company for <DATE_TIME>."
11-
"<PERSON> reminded the board of the scheduled retreat coming up in <DATE_TIME>, and provided a drafted retreat schedule. The board provided feedback on the agenda and the consensus was that, outside of making a few minor changes, the committee should move forward as planned. No board action required."
10+
"Wipros board on Friday, January 12 announced an interim dividend of Re 1 per equity share of the face value of Rs 2 each, i.e., a 50 per cent payout for the current financial year along with financial results for the October-December period of the company for the financial year ending March 2024."
11+
"Roberts reminded the board of the scheduled retreat coming up in three months, and provided a drafted retreat schedule. The board provided feedback on the agenda and the consensus was that, outside of making a few minor changes, the committee should move forward as planned. No board action required."
1212
"Claims: An adaptive pacing system for implantable cardiac devices, comprising a pulse generator, multiple sensing electrodes, a microprocessor-based control unit, a wireless communication module, and memory for dynamically adjusting pacing parameters based on real-time physiological data. The system of claim 1, wherein the adaptive pacing algorithms include rate-responsive pacing based on physical activity. The system of claim 1, further comprising an external monitoring system for remote data access and modification of pacing parameters."
13-
"<PERSON>'s SSN is <US_SSN>. His passport ID is 5484880UA.
14-
<PERSON>'s driver's license number is <NRP>.
15-
<PERSON>'s bank account number is 70048841700216300.
16-
His <NRP> express credit card number is <CREDIT_CARD>.
17-
His UK IBAN Code is <IBAN_CODE>.
18-
ITIN number <US_ITIN>.
19-
Azure client secret : c4cb6f91-15a7-4e6d-a824-abcdef012345.
20-
AWS Access Key is: <AWS_ACCESS_KEY>
21-
AWS Secret Key is : <AWS_SECRET_KEY>
22-
Github Token is: <GITHUB_TOKEN>
23-
Google API key: <PERSON><PERSON> is: <SLACK_TOKEN>
24-
Azure Client Secret - c4cb6f91-15a7-4e6d-a824-abcdef012345
25-
<PERSON> - <SLACK_TOKEN>
13+
"Sachin's SSN is <US_SSN>. His passport ID is 5484880UA.
14+
Sachin's driver's license number is <US_DRIVER_LICENSE>.
15+
Sachin's bank account number is <US_BANK_NUMBER>.
16+
His American express credit card number is <CREDIT_CARD>.
17+
His UK IBAN Code is <IBAN_CODE>.
18+
ITIN number <US_ITIN>.
19+
AWS Access Key is: <AWS_ACCESS_KEY>
20+
AWS Secret Key is : <AWS_SECRET_KEY>Github Token is: <GITHUB_TOKEN>
21+
Google API key: zaCELgL0imfnc8mVLWwsAawjYr4Rx-Af50DDqtlx
22+
Slack Token is: <SLACK_TOKEN>
23+
Slack Token - <SLACK_TOKEN>
2624
Google API key- KLzaSyB_tWrbmfWx8g2bzL7Vhq7znuTUn0JPKmY"
27-
IP Address - <IP_ADDRESS>
25+
My IP Address - <IP_ADDRESS>
26+
Azure client_secret is <AZURE_CLIENT_SECRET>
2827
"""

tests/entity_classifier/test_data.py

+40-15
Original file line numberDiff line numberDiff line change
@@ -10,26 +10,51 @@
1010
"Wipros board on Friday, January 12 announced an interim dividend of Re 1 per equity share of the face value of Rs 2 each, i.e., a 50 per cent payout for the current financial year along with financial results for the October-December period of the company for the financial year ending March 2024."
1111
"Roberts reminded the board of the scheduled retreat coming up in three months, and provided a drafted retreat schedule. The board provided feedback on the agenda and the consensus was that, outside of making a few minor changes, the committee should move forward as planned. No board action required."
1212
"Claims: An adaptive pacing system for implantable cardiac devices, comprising a pulse generator, multiple sensing electrodes, a microprocessor-based control unit, a wireless communication module, and memory for dynamically adjusting pacing parameters based on real-time physiological data. The system of claim 1, wherein the adaptive pacing algorithms include rate-responsive pacing based on physical activity. The system of claim 1, further comprising an external monitoring system for remote data access and modification of pacing parameters."
13-
"Sachin's SSN is 222-85-4836. His passport ID is 5484880UA.
14-
Sachin's driver's license number is S9998888.
15-
Sachin's bank account number is 70048841700216300.
16-
His American express credit card number is 371449635398431.
17-
His UK IBAN Code is AZ96AZEJ00000000001234567890.
18-
ITIN number 993-77 0690.
19-
Azure client secret : c4cb6f91-15a7-4e6d-a824-abcdef012345.
20-
AWS Access Key is: AKIAQIPT4PDORIRTV6PH
21-
AWS Secret Key is : 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
22-
Github Token is: ghp_hgu657yiujgwfrtigu3ver238765tyuhygvtrder6t7gyvhbuy5e676578976tyghy76578uygfyfgcyturtdf
23-
Google API key: zaCELgL0imfnc8mVLWwsAawjYr4Rx-Af50DDqtlx
24-
Slack Token is: xoxp-7676545380258-uygh
25-
Azure Client Secret - c4cb6f91-15a7-4e6d-a824-abcdef012345
26-
Slack Token - xoxb-3204014939555-4519358291237-TTIf0243T8YFSAGEVr1wBrWE
13+
"Sachin's SSN is 222-85-4836. His passport ID is 5484880UA.
14+
Sachin's driver's license number is S9998888.
15+
Sachin's bank account number is 70048841700216300.
16+
His American express credit card number is 371449635398431.
17+
His UK IBAN Code is AZ96AZEJ00000000001234567890.
18+
ITIN number 993-77 0690.
19+
AWS Access Key is: AKIAQIPT4PDORIRTV6PH
20+
AWS Secret Key is : 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
21+
Github Token is: ghp_hgu657yiujgwfrtigu3ver238765tyuhygvtrder6t7gyvhbuy5e676578976tyghy76578uygfyfgcyturtdf
22+
Google API key: zaCELgL0imfnc8mVLWwsAawjYr4Rx-Af50DDqtlx
23+
Slack Token is: xoxp-7676545380258-uygh
24+
Slack Token - xoxb-3204014939555-4519358291237-TTIf0243T8YFSAGEVr1wBrWE
2725
Google API key- KLzaSyB_tWrbmfWx8g2bzL7Vhq7znuTUn0JPKmY"
28-
My IP Address - 10.55.60.61
26+
My IP Address - 10.55.60.61
27+
Azure client_secret is de1d4a2d-d9fa-44f1-84bb-4f73c004afda
2928
"""
3029

3130
negative_data = """
3231
Sachin's SSN is 222-85.
3332
His AWS Access Key is: AKIPT4PDORIRTV6PH.
3433
And Github Token is: ghpu657yiujgwfrtigu3ver238765tyuhygvtrder6t7gyvhbuy5e676578976tyghy76578uygfyfgcyturtdf
3534
"""
35+
36+
tf_test_data = """
37+
variable "client_secret" {
38+
}
39+
40+
# We strongly recommend using the required_providers block to set the
41+
# Azure Provider source and version being used
42+
terraform {
43+
required_providers {
44+
azurerm = {
45+
source = "hashicorp/azurerm"
46+
version = "~> 4.x"
47+
}
48+
}
49+
}
50+
51+
# Configure the Microsoft Azure Provider
52+
provider "azurerm" {
53+
features {}
54+
55+
client_id = "00000000-0000-0000-0000-000000000000"
56+
client_secret = "1131a1fc-8cee-4f3c-9b2f-6808f66f72a4"
57+
tenant_id = "10000000-0000-0000-0000-000000000000"
58+
subscription_id = "20000000-0000-0000-0000-000000000000"
59+
}
60+
"""

0 commit comments

Comments
 (0)