Skip to content

Commit 7f128bd

Browse files
committed
OrcLib: FindFind: add Yara cache on last item
The Yara rules are all evaluated for each 'ntfs_find' with a 'yara_rule' attribute. Even if only one rule is specified all enabled rule will be tested. This commit add cache so 'ntfs_find' iterations uses the results of the first 'yara_rule' met. # Conflicts: # src/OrcLib/FileFind.cpp
1 parent 10b641a commit 7f128bd

File tree

2 files changed

+209
-4
lines changed

2 files changed

+209
-4
lines changed

src/OrcLib/FileFind.cpp

+183-2
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,26 @@ std::wstring GetFileName(const Orc::MFTRecord& record, const Orc::DataAttribute&
6161
return fmt::format(L"{}:{}", filename.empty() ? kUnknown : filename, attributeName);
6262
}
6363

64+
std::wstring GetFileName(const Orc::MFTRecord& record, size_t dataAttributeIndex)
65+
{
66+
const auto kUnknown = L"<Unknown>"sv;
67+
68+
if (dataAttributeIndex >= record.GetDataAttributes().size())
69+
{
70+
Log::Error("{}: unexpected data attribute index", __FUNCTION__);
71+
return std::wstring(kUnknown);
72+
}
73+
74+
const auto dataAttribute = record.GetDataAttributes()[dataAttributeIndex];
75+
if (!dataAttribute)
76+
{
77+
Log::Error("{}: unexpected nullptr data attribute", __FUNCTION__);
78+
return std::wstring(kUnknown);
79+
}
80+
81+
return GetFileName(record, *dataAttribute);
82+
}
83+
6484
uint64_t SumStreamsReadLength(const MFTRecord& record, const std::shared_ptr<VolumeReader>& volumeReader)
6585
{
6686
uint64_t sum = 0;
@@ -132,6 +152,12 @@ std::shared_ptr<ByteStream> GetOptimalStream(const std::shared_ptr<ByteStream> s
132152
return memstream;
133153
}
134154

155+
inline bool IsEqual(const FILE_REFERENCE& ref1, const FILE_REFERENCE& ref2)
156+
{
157+
return ref1.SegmentNumberHighPart == ref2.SegmentNumberHighPart
158+
&& ref1.SegmentNumberLowPart == ref2.SegmentNumberLowPart && ref1.SequenceNumber == ref2.SequenceNumber;
159+
}
160+
135161
} // namespace
136162

137163
std::wregex& FileFind::DOSPattern()
@@ -3029,6 +3055,112 @@ FileFind::SearchTerm::Criteria FileFind::MatchContains(
30293055
return matchedSpec;
30303056
}
30313057

3058+
Result<MatchingRuleCollection>
3059+
FileFind::FileFindMatchAllYaraRules(const Orc::MFTRecord& record, size_t dataAttributeIndex) const
3060+
{
3061+
const auto matchingRulesCache = m_yaraMatchCache.Get(record, dataAttributeIndex);
3062+
if (matchingRulesCache)
3063+
{
3064+
return *matchingRulesCache;
3065+
}
3066+
3067+
if (!m_YaraScan)
3068+
{
3069+
Log::Error("Yara not initialized");
3070+
return std::errc::resource_unavailable_try_again;
3071+
}
3072+
3073+
if (dataAttributeIndex >= record.GetDataAttributes().size())
3074+
{
3075+
Log::Error("{}: unexpected data attribute index", __FUNCTION__);
3076+
return std::errc::invalid_argument;
3077+
}
3078+
3079+
const auto& dataAttribute = record.GetDataAttributes()[dataAttributeIndex];
3080+
auto dataStream = dataAttribute->GetDataStream(m_pVolReader);
3081+
if (dataStream == nullptr)
3082+
{
3083+
return std::errc::io_error;
3084+
}
3085+
3086+
HRESULT hr = dataStream->SetFilePointer(0LL, SEEK_SET, nullptr);
3087+
if (FAILED(hr = dataStream->SetFilePointer(0LL, SEEK_SET, nullptr)))
3088+
{
3089+
auto ec = SystemError(hr);
3090+
Log::Error(L"Failed Yara scan while seeking on '{}' [{}]", ::GetFileName(record, *dataAttribute), ec);
3091+
return ec;
3092+
}
3093+
3094+
auto stream = ::GetOptimalStream(dataStream, 1024 * 1024 * 32);
3095+
3096+
auto [hrScan, matchingRules] = m_YaraScan->Scan(stream);
3097+
if (FAILED(hrScan))
3098+
{
3099+
auto ec = SystemError(hrScan);
3100+
Log::Error(
3101+
L"Failed Yara scan on '{}' (frn: {:#x}) [{}]",
3102+
::GetFileName(record, *dataAttribute),
3103+
NtfsFullSegmentNumber(&record.GetFileReferenceNumber()),
3104+
ec);
3105+
return ec;
3106+
}
3107+
3108+
m_yaraMatchCache.Set(record, dataAttributeIndex, matchingRules);
3109+
3110+
return matchingRules;
3111+
}
3112+
3113+
std::pair<Orc::FileFind::SearchTerm::Criteria, std::optional<MatchingRuleCollection>> Orc::FileFind::MatchYara(
3114+
const std::shared_ptr<SearchTerm>& aTerm,
3115+
const Orc::MFTRecord& record,
3116+
size_t dataAttributeIndex) const
3117+
{
3118+
if (!(aTerm->Required & SearchTerm::Criteria::YARA))
3119+
{
3120+
Log::Debug("{}: Unexpected call as term does not evaluate Yara", __FUNCTION__);
3121+
return {SearchTerm::Criteria::NONE, std::nullopt};
3122+
}
3123+
3124+
auto rv = FileFind::FileFindMatchAllYaraRules(record, dataAttributeIndex);
3125+
if (!rv)
3126+
{
3127+
Log::Critical(
3128+
L"Failed Yara on '{}' (frn: {:#x})",
3129+
::GetFileName(record, dataAttributeIndex),
3130+
NtfsFullSegmentNumber(&record.GetFileReferenceNumber()));
3131+
return {SearchTerm::Criteria::NONE, std::nullopt};
3132+
}
3133+
3134+
auto& matchingRules = *rv;
3135+
3136+
if (matchingRules.empty())
3137+
{
3138+
Log::Debug("No matching Yara rule");
3139+
return {SearchTerm::Criteria::NONE, std::nullopt};
3140+
}
3141+
3142+
if (aTerm->YaraRules.empty())
3143+
{
3144+
Log::Critical("Unexpected empty Yara rule in aTerm");
3145+
return {SearchTerm::Criteria::YARA, std::nullopt};
3146+
}
3147+
3148+
for (const auto& termRule : aTerm->YaraRules)
3149+
{
3150+
for (const auto& matchingRule : matchingRules)
3151+
{
3152+
if (PathMatchSpecA(matchingRule.c_str(), termRule.c_str()))
3153+
{
3154+
// Legacy: return all matching rules as one of requested is matching
3155+
return {SearchTerm::Criteria::YARA, std::move(matchingRules)};
3156+
}
3157+
}
3158+
}
3159+
3160+
return {SearchTerm::Criteria::NONE, std::nullopt};
3161+
}
3162+
3163+
// TODO: remove this legacy function
30323164
std::pair<Orc::FileFind::SearchTerm::Criteria, std::optional<MatchingRuleCollection>> Orc::FileFind::MatchYara(
30333165
const std::shared_ptr<SearchTerm>& aTerm,
30343166
const Orc::MFTRecord& record,
@@ -3233,8 +3365,12 @@ FileFind::SearchTerm::Criteria FileFind::AddMatchingData(
32333365
SearchTerm::Criteria requiredDataSpecs =
32343366
static_cast<SearchTerm::Criteria>(aTerm->Required & SearchTerm::DataMask());
32353367
SearchTerm::Criteria retval = SearchTerm::Criteria::NONE;
3236-
for (const auto& data_attr : pElt->GetDataAttributes())
3368+
3369+
const auto& dataAttributes = pElt->GetDataAttributes();
3370+
for (size_t dataAttributeIndex = 0; dataAttributeIndex < dataAttributes.size(); ++dataAttributeIndex)
32373371
{
3372+
const auto& data_attr = dataAttributes[dataAttributeIndex];
3373+
32383374
if (::IsExcludedDataAttribute(*pElt, *data_attr))
32393375
{
32403376
continue;
@@ -3285,7 +3421,7 @@ FileFind::SearchTerm::Criteria FileFind::AddMatchingData(
32853421
}
32863422
if (requiredDataSpecs & SearchTerm::Criteria::YARA)
32873423
{
3288-
auto [aSpec, matched] = MatchYara(aTerm, *pElt, data_attr);
3424+
auto [aSpec, matched] = MatchYara(aTerm, *pElt, dataAttributeIndex);
32893425
if (matched.has_value())
32903426
std::swap(matchedRules, matched.value());
32913427
if (aSpec == SearchTerm::Criteria::NONE)
@@ -4310,3 +4446,48 @@ const std::vector<FileFind::SearchTerm::Ptr>& FileFind::AllSearchTerms() const
43104446
{
43114447
return m_AllTerms;
43124448
}
4449+
4450+
FileFind::YaraMatchCache::YaraMatchCache()
4451+
: m_frn({0})
4452+
, m_match()
4453+
{
4454+
}
4455+
4456+
std::optional<MatchingRuleCollection>
4457+
FileFind::YaraMatchCache::Get(const MFTRecord& record, size_t dataAttributeIndex) const
4458+
{
4459+
if (!::IsEqual(record.GetFileReferenceNumber(), m_frn))
4460+
{
4461+
Log::Debug("Yara match cache miss: invalid frn");
4462+
return {};
4463+
}
4464+
4465+
if (dataAttributeIndex >= m_match.size())
4466+
{
4467+
Log::Debug("Yara match cache miss: invalid index");
4468+
return {};
4469+
}
4470+
4471+
Log::Debug("Yara match cache hit");
4472+
return m_match[dataAttributeIndex];
4473+
}
4474+
4475+
void FileFind::YaraMatchCache::Set(const MFTRecord& record, size_t dataAttributeIndex, MatchingRuleCollection match)
4476+
{
4477+
if (!::IsEqual(record.GetFileReferenceNumber(), m_frn))
4478+
{
4479+
Log::Debug("Yara match cache clear");
4480+
m_match.clear();
4481+
m_match.resize(record.GetDataAttributes().size());
4482+
}
4483+
4484+
if (dataAttributeIndex >= m_match.size())
4485+
{
4486+
Log::Error("Yara match cache miss: unexpected and invalid index");
4487+
return;
4488+
}
4489+
4490+
Log::Debug("Yara match cache update");
4491+
m_frn = record.GetFileReferenceNumber();
4492+
m_match[dataAttributeIndex] = std::move(match);
4493+
}

src/OrcLib/FileFind.h

+26-2
Original file line numberDiff line numberDiff line change
@@ -478,6 +478,7 @@ class FileFind
478478
CryptoHashStream::Algorithm matchHash = CryptoHashStream::Algorithm::Undefined,
479479
bool storeMatches = true)
480480
: m_FullNameBuilder(nullptr)
481+
, m_yaraMatchCache()
481482
, m_bProvideStream(bProvideStream)
482483
, m_MatchHash(matchHash)
483484
, m_storeMatches(storeMatches)
@@ -554,6 +555,24 @@ class FileFind
554555

555556
std::vector<std::shared_ptr<Match>> m_Matches;
556557

558+
// For each file record FileFind will iterate over terms. For each term it will iterate over data attributes and
559+
// eventually run Yara match. Cache must persists while iterating over terms. FRN and dataAttribute index are used
560+
// as key to resolve cache entry.
561+
class YaraMatchCache
562+
{
563+
public:
564+
YaraMatchCache();
565+
566+
std::optional<MatchingRuleCollection> Get(const Orc::MFTRecord& record, size_t dataAttributeIndex) const;
567+
void Set(const Orc::MFTRecord& record, size_t dataAttributeIndex, MatchingRuleCollection match);
568+
569+
private:
570+
FILE_REFERENCE m_frn;
571+
std::vector<std::optional<MatchingRuleCollection>> m_match; // index map to Data's attribute index
572+
};
573+
574+
mutable YaraMatchCache m_yaraMatchCache;
575+
557576
bool m_bProvideStream = false;
558577
CryptoHashStream::Algorithm m_MatchHash = CryptoHashStream::Algorithm::Undefined;
559578

@@ -646,13 +665,18 @@ class FileFind
646665
MatchHash(const std::shared_ptr<SearchTerm>& aTerm, const std::shared_ptr<DataAttribute>& pDataAttr) const;
647666
SearchTerm::Criteria
648667
MatchContains(const std::shared_ptr<SearchTerm>& aTerm, const std::shared_ptr<DataAttribute>& pDataAttr) const;
649-
std::pair<SearchTerm::Criteria, std::optional<MatchingRuleCollection>>
650668

651-
MatchYara(
669+
std::pair<SearchTerm::Criteria, std::optional<MatchingRuleCollection>> MatchYara(
652670
const std::shared_ptr<SearchTerm>& aTerm,
653671
const Orc::MFTRecord& record,
654672
const std::shared_ptr<DataAttribute>& pDataAttr) const;
655673

674+
std::pair<Orc::FileFind::SearchTerm::Criteria, std::optional<MatchingRuleCollection>>
675+
MatchYara(const std::shared_ptr<SearchTerm>& aTerm, const Orc::MFTRecord& record, size_t dataAttributeIndex) const;
676+
677+
Result<MatchingRuleCollection>
678+
FileFindMatchAllYaraRules(const Orc::MFTRecord& record, size_t dataAttributeIndex) const;
679+
656680
SearchTerm::Criteria AddMatchingData(
657681
const std::shared_ptr<SearchTerm>& aTerm,
658682
SearchTerm::Criteria required,

0 commit comments

Comments
 (0)