Linux Archive

Linux Archive (http://www.linux-archive.org/)
-   ArchLinux Pacman Development (http://www.linux-archive.org/archlinux-pacman-development/)
-   -   Allow querying directory ownership (http://www.linux-archive.org/archlinux-pacman-development/669228-allow-querying-directory-ownership.html)

Allan McRae 05-21-2012 03:22 PM

Allow querying directory ownership
 
The restriction of not checking the ownership of a directory is
unnecessary given that all the package filelists contain this
information. Remove this restriction, with the expectation that you
might get multiple packages returned for a given directory.
Additionally attempt to minimise the number of files getting through
to the slow realpath call.

This combines ideas from two patches that have been around for a long
time.

Original-work-by: Andrew Gregory <andrew.gregory.8@gmail.com>
Original-work-by: Dan McGee <dan@archlinux.org>
Signed-off-by: Allan McRae <allan@archlinux.org>
---

Not having this feature is beginning to annoy me! Hopefully I have
captured the important bits of both patches...

src/pacman/query.c | 38 +++++++++++++++++++++++++++-----------
1 file changed, 27 insertions(+), 11 deletions(-)

diff --git a/src/pacman/query.c b/src/pacman/query.c
index 464efbf..beaf76c 100644
--- a/src/pacman/query.c
+++ b/src/pacman/query.c
@@ -113,6 +113,7 @@ static int query_fileowner(alpm_list_t *targets)
size_t rootlen;
alpm_list_t *t;
alpm_db_t *db_local;
+ alpm_list_t *packages;

/* This code is here for safety only */
if(targets == NULL) {
@@ -133,13 +134,14 @@ static int query_fileowner(alpm_list_t *targets)
strcpy(path, root);

db_local = alpm_get_localdb(config->handle);
+ packages = alpm_db_get_pkgcache(db_local);

for(t = targets; t; t = alpm_list_next(t)) {
char *filename, *dname, *rpath;
const char *bname;
struct stat buf;
alpm_list_t *i;
- int found = 0;
+ size_t found = 0, isdir, bname_len, dname_len;

filename = strdup(t->data);

@@ -162,16 +164,20 @@ static int query_fileowner(alpm_list_t *targets)
}
}

- if(S_ISDIR(buf.st_mode)) {
- pm_printf(ALPM_LOG_ERROR,
- _("cannot determine ownership of directory '%s'
"), filename);
- ret++;
- free(filename);
- continue;
+ /* make sure directories have a trailing '/' */
+ if((isdir = S_ISDIR(buf.st_mode))) {
+ size_t len = strlen(filename);
+ if(filename[len-1] != '/') {
+ filename = realloc(filename, sizeof(char) * (len + 2));
+ strcat(filename, "/");
+ }
}

bname = mbasename(filename);
+ bname_len = strlen(bname);
dname = mdirname(filename);
+ dname_len = strlen(dname);
+
/* for files in '/', there is no directory name to match */
if(strcmp(dname, "") == 0) {
rpath = NULL;
@@ -190,7 +196,7 @@ static int query_fileowner(alpm_list_t *targets)
}
free(dname);

- for(i = alpm_db_get_pkgcache(db_local); i && !found; i = alpm_list_next(i)) {
+ for(i = packages; i && (!found || isdir); i = alpm_list_next(i)) {
alpm_pkg_t *info = i->data;
alpm_filelist_t *filelist = alpm_pkg_get_files(info);
size_t j;
@@ -199,17 +205,27 @@ static int query_fileowner(alpm_list_t *targets)
const alpm_file_t *file = filelist->files + j;
char *ppath, *pdname;
const char *pkgfile = file->name;
+ size_t pkgfile_len = strlen(pkgfile);

- /* avoid the costly resolve_path usage if the basenames don't match */
- if(strcmp(mbasename(pkgfile), bname) != 0) {
+ /* avoid the costly resolve_path usage if the basenames don't match;
+ * we can also cheat by comparing the final characters first and avoid
+ * a full string comparison */
+ if(!isdir && (pkgfile[pkgfile_len - 1] != bname[bname_len - 1] ||
+ strcmp(mbasename(pkgfile), bname) != 0)) {
continue;
+ } else if(isdir) {
+ /* database path needs trailing slash */
+ if(pkgfile[pkgfile_len - 1] != '/' &&
+ pkgfile[pkgfile_len - 2] != dname[dname_len - 2]) {
+ continue;
+ }
}

/* for files in '/', there is no directory name to match */
if(!rpath) {
print_query_fileowner(filename, info);
found = 1;
- continue;
+ break;
}

if(rootlen + 1 + strlen(pkgfile) > PATH_MAX) {
--
1.7.10.2

Allan McRae 05-21-2012 03:27 PM

Allow querying directory ownership
 
On 22/05/12 01:22, Allan McRae wrote:
> The restriction of not checking the ownership of a directory is
> unnecessary given that all the package filelists contain this
> information. Remove this restriction, with the expectation that you
> might get multiple packages returned for a given directory.
> Additionally attempt to minimise the number of files getting through
> to the slow realpath call.
>
> This combines ideas from two patches that have been around for a long
> time.
>
> Original-work-by: Andrew Gregory <andrew.gregory.8@gmail.com>
> Original-work-by: Dan McGee <dan@archlinux.org>
> Signed-off-by: Allan McRae <allan@archlinux.org>
> ---
>
> Not having this feature is beginning to annoy me! Hopefully I have
> captured the important bits of both patches...
>
> src/pacman/query.c | 38 +++++++++++++++++++++++++++-----------
> 1 file changed, 27 insertions(+), 11 deletions(-)
>
> diff --git a/src/pacman/query.c b/src/pacman/query.c
> index 464efbf..beaf76c 100644
> --- a/src/pacman/query.c
> +++ b/src/pacman/query.c
> @@ -113,6 +113,7 @@ static int query_fileowner(alpm_list_t *targets)
> size_t rootlen;
> alpm_list_t *t;
> alpm_db_t *db_local;
> + alpm_list_t *packages;
>
> /* This code is here for safety only */
> if(targets == NULL) {
> @@ -133,13 +134,14 @@ static int query_fileowner(alpm_list_t *targets)
> strcpy(path, root);
>
> db_local = alpm_get_localdb(config->handle);
> + packages = alpm_db_get_pkgcache(db_local);
>
> for(t = targets; t; t = alpm_list_next(t)) {
> char *filename, *dname, *rpath;
> const char *bname;
> struct stat buf;
> alpm_list_t *i;
> - int found = 0;
> + size_t found = 0, isdir, bname_len, dname_len;
>
> filename = strdup(t->data);
>
> @@ -162,16 +164,20 @@ static int query_fileowner(alpm_list_t *targets)
> }
> }
>
> - if(S_ISDIR(buf.st_mode)) {
> - pm_printf(ALPM_LOG_ERROR,
> - _("cannot determine ownership of directory '%s'
"), filename);
> - ret++;
> - free(filename);
> - continue;
> + /* make sure directories have a trailing '/' */
> + if((isdir = S_ISDIR(buf.st_mode))) {
> + size_t len = strlen(filename);
> + if(filename[len-1] != '/') {
> + filename = realloc(filename, sizeof(char) * (len + 2));
> + strcat(filename, "/");
> + }
> }
>
> bname = mbasename(filename);
> + bname_len = strlen(bname);
> dname = mdirname(filename);
> + dname_len = strlen(dname);
> +
> /* for files in '/', there is no directory name to match */
> if(strcmp(dname, "") == 0) {
> rpath = NULL;
> @@ -190,7 +196,7 @@ static int query_fileowner(alpm_list_t *targets)
> }
> free(dname);
>
> - for(i = alpm_db_get_pkgcache(db_local); i && !found; i = alpm_list_next(i)) {
> + for(i = packages; i && (!found || isdir); i = alpm_list_next(i)) {
> alpm_pkg_t *info = i->data;
> alpm_filelist_t *filelist = alpm_pkg_get_files(info);
> size_t j;
> @@ -199,17 +205,27 @@ static int query_fileowner(alpm_list_t *targets)
> const alpm_file_t *file = filelist->files + j;
> char *ppath, *pdname;
> const char *pkgfile = file->name;
> + size_t pkgfile_len = strlen(pkgfile);
>
> - /* avoid the costly resolve_path usage if the basenames don't match */
> - if(strcmp(mbasename(pkgfile), bname) != 0) {
> + /* avoid the costly resolve_path usage if the basenames don't match;
> + * we can also cheat by comparing the final characters first and avoid
> + * a full string comparison */
> + if(!isdir && (pkgfile[pkgfile_len - 1] != bname[bname_len - 1] ||
> + strcmp(mbasename(pkgfile), bname) != 0)) {
> continue;
> + } else if(isdir) {
> + /* database path needs trailing slash */
> + if(pkgfile[pkgfile_len - 1] != '/' &&
> + pkgfile[pkgfile_len - 2] != dname[dname_len - 2]) {

BAD! Will fix...

Allan McRae 05-21-2012 10:28 PM

Allow querying directory ownership
 
The restriction of not checking the ownership of a directory is
unnecessary given that all the package filelists contain this
information. Remove this restriction, with the expectation that you
might get multiple packages returned for a given directory.
Additionally attempt to minimise the number of files getting through
to the slow realpath call.

This combines ideas from two patches that have been around for a long
time.

Original-work-by: Andrew Gregory <andrew.gregory.8@gmail.com>
Original-work-by: Dan McGee <dan@archlinux.org>
Signed-off-by: Allan McRae <allan@archlinux.org>
---
src/pacman/query.c | 58 +++++++++++++++++++++++++++++++++++++++++-----------
1 file changed, 46 insertions(+), 12 deletions(-)

diff --git a/src/pacman/query.c b/src/pacman/query.c
index 07509fa..cf14fab 100644
--- a/src/pacman/query.c
+++ b/src/pacman/query.c
@@ -113,6 +113,7 @@ static int query_fileowner(alpm_list_t *targets)
size_t rootlen;
alpm_list_t *t;
alpm_db_t *db_local;
+ alpm_list_t *packages;

/* This code is here for safety only */
if(targets == NULL) {
@@ -133,13 +134,14 @@ static int query_fileowner(alpm_list_t *targets)
strcpy(path, root);

db_local = alpm_get_localdb(config->handle);
+ packages = alpm_db_get_pkgcache(db_local);

for(t = targets; t; t = alpm_list_next(t)) {
char *filename, *dname, *rpath;
- const char *bname;
+ const char *bname, *rpath_bname = NULL;
struct stat buf;
alpm_list_t *i;
- int found = 0;
+ size_t found = 0, isdir, bname_len, rpath_len, rpath_bname_len = 0;

filename = strdup(t->data);

@@ -162,16 +164,19 @@ static int query_fileowner(alpm_list_t *targets)
}
}

- if(S_ISDIR(buf.st_mode)) {
- pm_printf(ALPM_LOG_ERROR,
- _("cannot determine ownership of directory '%s'
"), filename);
- ret++;
- free(filename);
- continue;
+ /* make sure directories have a trailing '/' */
+ if((isdir = S_ISDIR(buf.st_mode))) {
+ size_t len = strlen(filename);
+ if(filename[len-1] != '/') {
+ filename = realloc(filename, sizeof(char) * (len + 2));
+ strcat(filename, "/");
+ }
}

bname = mbasename(filename);
+ bname_len = strlen(bname);
dname = mdirname(filename);
+
/* for files in '/', there is no directory name to match */
if(strcmp(dname, "") == 0) {
rpath = NULL;
@@ -189,8 +194,14 @@ static int query_fileowner(alpm_list_t *targets)
}
}
free(dname);
+ rpath_len = strlen(rpath);

- for(i = alpm_db_get_pkgcache(db_local); i && !found; i = alpm_list_next(i)) {
+ if(isdir) {
+ rpath_bname = mbasename(rpath);
+ rpath_bname_len = strlen(rpath_bname);
+ }
+
+ for(i = packages; i && (!found || isdir); i = alpm_list_next(i)) {
alpm_pkg_t *info = i->data;
alpm_filelist_t *filelist = alpm_pkg_get_files(info);
size_t j;
@@ -199,17 +210,40 @@ static int query_fileowner(alpm_list_t *targets)
const alpm_file_t *file = filelist->files + j;
char *ppath, *pdname;
const char *pkgfile = file->name;
+ size_t pkgfile_len = strlen(pkgfile);

- /* avoid the costly resolve_path usage if the basenames don't match */
- if(strcmp(mbasename(pkgfile), bname) != 0) {
+ /* avoid the costly resolve_path usage if the basenames don't match;
+ * we can also cheat by comparing the final characters first and avoid
+ * a full string comparison */
+ if(!isdir && (pkgfile[pkgfile_len - 1] != bname[bname_len - 1] ||
+ strcmp(mbasename(pkgfile), bname) != 0)) {
continue;
}

+ if(isdir) {
+ /* check the filelist entry is a directory using its
+ * trailing '/' and compare final characters of directory
+ * names. */
+ if(pkgfile[pkgfile_len - 1] != '/' ||
+ pkgfile[pkgfile_len - 2] != rpath[rpath_len - 1]) {
+ continue;
+ }
+
+ /* compare final directory portion */
+ if(pkgfile_len - 1 < rpath_bname_len ||
+ (pkgfile_len - 2 >= rpath_bname_len &&
+ pkgfile[pkgfile_len - 2 - rpath_bname_len] != '/') ||
+ strncmp(pkgfile + pkgfile_len - 1 - rpath_bname_len,
+ rpath_bname, rpath_bname_len)) {
+ continue;
+ }
+ }
+
/* for files in '/', there is no directory name to match */
if(!rpath) {
print_query_fileowner(filename, info);
found = 1;
- continue;
+ break;
}

if(rootlen + 1 + strlen(pkgfile) > PATH_MAX) {
--
1.7.10.2

Andrew Gregory 05-22-2012 01:20 AM

Allow querying directory ownership
 
On Tue, 22 May 2012 08:28:11 +1000
Allan McRae <allan@archlinux.org> wrote:

> The restriction of not checking the ownership of a directory is
> unnecessary given that all the package filelists contain this
> information. Remove this restriction, with the expectation that you
> might get multiple packages returned for a given directory.
> Additionally attempt to minimise the number of files getting through
> to the slow realpath call.
>
> This combines ideas from two patches that have been around for a long
> time.
>
> Original-work-by: Andrew Gregory <andrew.gregory.8@gmail.com>
> Original-work-by: Dan McGee <dan@archlinux.org>
> Signed-off-by: Allan McRae <allan@archlinux.org>

I actually decided to try tackling this again recently too. If
you're interested, you can see the most recent incarnation here:
http://bitbucket.org/andrewgregory/pacman/changesets/tip/directory_owner
It gets rid of a lot of the basename/dirname stuff which I think makes
it a lot easier to follow.

Allan McRae 05-22-2012 02:11 AM

Allow querying directory ownership
 
The restriction of not checking the ownership of a directory is
unnecessary given that all the package filelists contain this
information. Remove this restriction, with the expectation that you
might get multiple packages returned for a given directory.
Additionally attempt to minimise the number of files getting through
to the slow realpath call.

This combines ideas from two patches that have been around for a long
time.

Original-work-by: Andrew Gregory <andrew.gregory.8@gmail.com>
Original-work-by: Dan McGee <dan@archlinux.org>
Signed-off-by: Allan McRae <allan@archlinux.org>
---

v3 - Now with less segfaults on the rather silly "pacman -Qo /"...

src/pacman/query.c | 70 +++++++++++++++++++++++++++++++++++++++++-----------
1 file changed, 55 insertions(+), 15 deletions(-)

diff --git a/src/pacman/query.c b/src/pacman/query.c
index 07509fa..67dd11c 100644
--- a/src/pacman/query.c
+++ b/src/pacman/query.c
@@ -113,6 +113,7 @@ static int query_fileowner(alpm_list_t *targets)
size_t rootlen;
alpm_list_t *t;
alpm_db_t *db_local;
+ alpm_list_t *packages;

/* This code is here for safety only */
if(targets == NULL) {
@@ -133,13 +134,14 @@ static int query_fileowner(alpm_list_t *targets)
strcpy(path, root);

db_local = alpm_get_localdb(config->handle);
+ packages = alpm_db_get_pkgcache(db_local);

for(t = targets; t; t = alpm_list_next(t)) {
char *filename, *dname, *rpath;
- const char *bname;
+ const char *bname, *rpath_bname = NULL;
struct stat buf;
alpm_list_t *i;
- int found = 0;
+ size_t found = 0, isdir, bname_len, rpath_len = 0, rpath_bname_len = 0;

filename = strdup(t->data);

@@ -162,16 +164,19 @@ static int query_fileowner(alpm_list_t *targets)
}
}

- if(S_ISDIR(buf.st_mode)) {
- pm_printf(ALPM_LOG_ERROR,
- _("cannot determine ownership of directory '%s'
"), filename);
- ret++;
- free(filename);
- continue;
+ /* make sure directories have a trailing '/' */
+ if((isdir = S_ISDIR(buf.st_mode))) {
+ size_t len = strlen(filename);
+ if(filename[len-1] != '/') {
+ filename = realloc(filename, sizeof(char) * (len + 2));
+ strcat(filename, "/");
+ }
}

bname = mbasename(filename);
+ bname_len = strlen(bname);
dname = mdirname(filename);
+
/* for files in '/', there is no directory name to match */
if(strcmp(dname, "") == 0) {
rpath = NULL;
@@ -181,16 +186,26 @@ static int query_fileowner(alpm_list_t *targets)
if(!rpath) {
pm_printf(ALPM_LOG_ERROR, _("cannot determine real path for '%s': %s
"),
filename, strerror(errno));
- free(filename);
free(dname);
- free(rpath);
ret++;
- continue;
+ goto cleanup;
}
}
free(dname);

- for(i = alpm_db_get_pkgcache(db_local); i && !found; i = alpm_list_next(i)) {
+ if(isdir) {
+ if(!rpath) {
+ pm_printf(ALPM_LOG_ERROR, _("cannot determine ownership of root directory
"));
+ ret++;
+ goto cleanup;
+ } else {
+ rpath_len = strlen(rpath);
+ rpath_bname = mbasename(rpath);
+ rpath_bname_len = strlen(rpath_bname);
+ }
+ }
+
+ for(i = packages; i && (!found || isdir); i = alpm_list_next(i)) {
alpm_pkg_t *info = i->data;
alpm_filelist_t *filelist = alpm_pkg_get_files(info);
size_t j;
@@ -199,17 +214,40 @@ static int query_fileowner(alpm_list_t *targets)
const alpm_file_t *file = filelist->files + j;
char *ppath, *pdname;
const char *pkgfile = file->name;
+ size_t pkgfile_len = strlen(pkgfile);

- /* avoid the costly resolve_path usage if the basenames don't match */
- if(strcmp(mbasename(pkgfile), bname) != 0) {
+ /* avoid the costly resolve_path usage if the basenames don't match;
+ * we can also cheat by comparing the final characters first and avoid
+ * a full string comparison */
+ if(!isdir && (pkgfile[pkgfile_len - 1] != bname[bname_len - 1] ||
+ strcmp(mbasename(pkgfile), bname) != 0)) {
continue;
}

+ if(isdir) {
+ /* check the filelist entry is a directory using its
+ * trailing '/' and compare final characters of directory
+ * names. */
+ if(pkgfile[pkgfile_len - 1] != '/' ||
+ pkgfile[pkgfile_len - 2] != rpath[rpath_len - 1]) {
+ continue;
+ }
+
+ /* compare final directory portion */
+ if(pkgfile_len - 1 < rpath_bname_len ||
+ (pkgfile_len - 2 >= rpath_bname_len &&
+ pkgfile[pkgfile_len - 2 - rpath_bname_len] != '/') ||
+ strncmp(pkgfile + pkgfile_len - 1 - rpath_bname_len,
+ rpath_bname, rpath_bname_len)) {
+ continue;
+ }
+ }
+
/* for files in '/', there is no directory name to match */
if(!rpath) {
print_query_fileowner(filename, info);
found = 1;
- continue;
+ break;
}

if(rootlen + 1 + strlen(pkgfile) > PATH_MAX) {
@@ -233,6 +271,8 @@ static int query_fileowner(alpm_list_t *targets)
pm_printf(ALPM_LOG_ERROR, _("No package owns %s
"), filename);
ret++;
}
+
+cleanup:
free(filename);
free(rpath);
}
--
1.7.10.2

Allan McRae 05-22-2012 06:38 AM

Allow querying directory ownership
 
On 22/05/12 11:20, Andrew Gregory wrote:
> On Tue, 22 May 2012 08:28:11 +1000
> Allan McRae <allan@archlinux.org> wrote:
>
>> The restriction of not checking the ownership of a directory is
>> unnecessary given that all the package filelists contain this
>> information. Remove this restriction, with the expectation that you
>> might get multiple packages returned for a given directory.
>> Additionally attempt to minimise the number of files getting through
>> to the slow realpath call.
>>
>> This combines ideas from two patches that have been around for a long
>> time.
>>
>> Original-work-by: Andrew Gregory <andrew.gregory.8@gmail.com>
>> Original-work-by: Dan McGee <dan@archlinux.org>
>> Signed-off-by: Allan McRae <allan@archlinux.org>
>
> I actually decided to try tackling this again recently too. If
> you're interested, you can see the most recent incarnation here:
> http://bitbucket.org/andrewgregory/pacman/changesets/tip/directory_owner
> It gets rid of a lot of the basename/dirname stuff which I think makes
> it a lot easier to follow.
>

I had a quick look through your patch. It is quite a change to the code
so there is not anything obvious for me to grab into this one.

How far would your patch be from being ready? If the answer is really
soon, and Dan likes the look of your changes, then I am quite happy for
"my" patch to be ignored.

Allan

Andrew Gregory 05-22-2012 02:02 PM

Allow querying directory ownership
 
On Tue, 22 May 2012 16:38:42 +1000
Allan McRae <allan@archlinux.org> wrote:

> On 22/05/12 11:20, Andrew Gregory wrote:
> > On Tue, 22 May 2012 08:28:11 +1000
> > Allan McRae <allan@archlinux.org> wrote:
> >
> >> The restriction of not checking the ownership of a directory is
> >> unnecessary given that all the package filelists contain this
> >> information. Remove this restriction, with the expectation that you
> >> might get multiple packages returned for a given directory.
> >> Additionally attempt to minimise the number of files getting
> >> through to the slow realpath call.
> >>
> >> This combines ideas from two patches that have been around for a
> >> long time.
> >>
> >> Original-work-by: Andrew Gregory <andrew.gregory.8@gmail.com>
> >> Original-work-by: Dan McGee <dan@archlinux.org>
> >> Signed-off-by: Allan McRae <allan@archlinux.org>
> >
> > I actually decided to try tackling this again recently too. If
> > you're interested, you can see the most recent incarnation here:
> > http://bitbucket.org/andrewgregory/pacman/changesets/tip/directory_owner
> > It gets rid of a lot of the basename/dirname stuff which I think
> > makes it a lot easier to follow.
> >
>
> I had a quick look through your patch. It is quite a change to the
> code so there is not anything obvious for me to grab into this one.
>
> How far would your patch be from being ready? If the answer is really
> soon, and Dan likes the look of your changes, then I am quite happy
> for "my" patch to be ignored.
>
> Allan
>

Unfortunately, in removing the dirname/basename stuff I built in the
assumption that the pkgfile paths will never include symlinks or
anything else that needs to be resolved. I'm not sure why they would,
but it's something we support at the moment. If everybody's ok with
adding that assumption, then the answer is really soon. If not, we
should go ahead with your patch. I do have a couple suggestions for
improving it though, which I'll send along separately.

Andrew Gregory 05-22-2012 03:22 PM

Allow querying directory ownership
 
From: Andrew Gregory <andrew.gregory.8@gmail.com>

---

I did this as a patch against your working branch since it requires
changes outside the scope of your original patch. This adds a check
that all files/directories are within root before we bother looking for
an owner which removes the need for a special check just for '/'. I
didn't test this extensively, but it handled `./pacman -Qo /
firefox /usr/bin/firefox /tmp /tmp/` correctly.

src/pacman/query.c | 51 +++++++++++++++++++++++++++------------------------
src/pacman/util.c | 4 ++++
2 files changed, 31 insertions(+), 24 deletions(-)

diff --git a/src/pacman/query.c b/src/pacman/query.c
index 67dd11c..d99d5df 100644
--- a/src/pacman/query.c
+++ b/src/pacman/query.c
@@ -109,7 +109,7 @@ static int query_fileowner(alpm_list_t *targets)
{
int ret = 0;
char path[PATH_MAX];
- const char *root;
+ char *root;
size_t rootlen;
alpm_list_t *t;
alpm_db_t *db_local;
@@ -124,9 +124,15 @@ static int query_fileowner(alpm_list_t *targets)
/* Set up our root path buffer. We only need to copy the location of root in
* once, then we can just overwrite whatever file was there on the previous
* iteration. */
- root = alpm_option_get_root(config->handle);
+ /* I don't know if we can assume that root doesn't contain symlinks,
+ * if not we should go ahead and resolve it */
+ root = strdup(resolve_path(alpm_option_get_root(config->handle)));
rootlen = strlen(root);
- if(rootlen + 1 > PATH_MAX) {
+ if(root[rootlen - 1] != '/' && rootlen + 1 < PATH_MAX){
+ strcat(root, "/");
+ rootlen++;
+ }
+ if(rootlen + 1 > PATH_MAX || root[rootlen - 1] != '/') {
/* we are in trouble here */
pm_printf(ALPM_LOG_ERROR, _("path too long: %s%s
"), root, "");
return 1;
@@ -177,32 +183,28 @@ static int query_fileowner(alpm_list_t *targets)
bname_len = strlen(bname);
dname = mdirname(filename);

- /* for files in '/', there is no directory name to match */
- if(strcmp(dname, "") == 0) {
- rpath = NULL;
- } else {
- rpath = resolve_path(dname);
+ rpath = resolve_path(dname);

- if(!rpath) {
- pm_printf(ALPM_LOG_ERROR, _("cannot determine real path for '%s': %s
"),
- filename, strerror(errno));
- free(dname);
- ret++;
- goto cleanup;
- }
+ if(!rpath) {
+ pm_printf(ALPM_LOG_ERROR, _("cannot determine real path for '%s': %s
"),
+ dname, strerror(errno));
+ free(dname);
+ ret++;
+ goto cleanup;
}
free(dname);

+ if(strstr(root, rpath) && *bname == '') {
+ /* path is either outside root or *is* root */
+ pm_printf(ALPM_LOG_ERROR, _("cannot determine ownership of file outside root directory
"));
+ ret++;
+ goto cleanup;
+ }
+
if(isdir) {
- if(!rpath) {
- pm_printf(ALPM_LOG_ERROR, _("cannot determine ownership of root directory
"));
- ret++;
- goto cleanup;
- } else {
- rpath_len = strlen(rpath);
- rpath_bname = mbasename(rpath);
- rpath_bname_len = strlen(rpath_bname);
- }
+ rpath_len = strlen(rpath);
+ rpath_bname = mbasename(rpath);
+ rpath_bname_len = strlen(rpath_bname);
}

for(i = packages; i && (!found || isdir); i = alpm_list_next(i)) {
@@ -276,6 +278,7 @@ cleanup:
free(filename);
free(rpath);
}
+ free(root);

return ret;
}
diff --git a/src/pacman/util.c b/src/pacman/util.c
index 7f7f6a7..1914b70 100644
--- a/src/pacman/util.c
+++ b/src/pacman/util.c
@@ -242,6 +242,10 @@ char *mdirname(const char *path)
ret = strdup(path);
last = strrchr(ret, '/');

+ if(last == ret){
+ free(ret);
+ return strdup("/");
+ }
if(last != NULL) {
/* we found a '/', so terminate our string */
*last = '';
--
1.7.10.2

05-25-2012 12:42 AM

Allow querying directory ownership
 
From: Andrew Gregory <andrew.gregory.8@gmail.com>

The restriction of not checking the ownership of a directory is
unnecessary given that all the package filelists contain this
information.

Additional Changes:
* mbasename/mdirname behave more like their posix counterparts
* check that files are in root before looking for an owner
* only compare package files of the same type as the target file
* add lrealpath, like realpath but for symlinks
* resolve root and make sure it ends with '/'
* removed resolve_path()

Signed-off-by: Andrew Gregory <andrew.gregory.8@gmail.com>
---

This is an alternative to Allan's recent patch. This pretty thoroughly
rewrites query_fileowner() and attempts to simplify the process in addition to
allowing querying directories. I mentioned before that my version assumed that
package file paths didn't contain symlinks; that assumption has been removed.

My public repo is available at:
https://bitbucket.org/andrewgregory/pacman/changesets/tip/dir_own_final
It includes a perl script in src/pacan used to test this patch against various
file/directory/symlink situations.

src/pacman/query.c | 185 ++++++++++++++++++++++++++++------------------------
src/pacman/util.c | 128 ++++++++++++++++++++++++++++--------
src/pacman/util.h | 4 +-
3 files changed, 205 insertions(+), 112 deletions(-)

diff --git a/src/pacman/query.c b/src/pacman/query.c
index a1cc1cf..6ef0744 100644
--- a/src/pacman/query.c
+++ b/src/pacman/query.c
@@ -36,23 +36,6 @@
#include "conf.h"
#include "util.h"

-static char *resolve_path(const char *file)
-{
- char *str = NULL;
-
- str = calloc(PATH_MAX, sizeof(char));
- if(!str) {
- return NULL;
- }
-
- if(!realpath(file, str)) {
- free(str);
- return NULL;
- }
-
- return str;
-}
-
/* check if filename exists in PATH */
static int search_path(char **filename, struct stat *bufptr)
{
@@ -107,134 +90,166 @@ static void print_query_fileowner(const char *filename, alpm_pkg_t *info)

static int query_fileowner(alpm_list_t *targets)
{
- int ret = 0;
- char path[PATH_MAX];
- const char *root;
- size_t rootlen;
- alpm_list_t *t;
- alpm_db_t *db_local;
-
- /* This code is here for safety only */
if(targets == NULL) {
pm_printf(ALPM_LOG_ERROR, _("no file was specified for --owns
"));
return 1;
}

- /* Set up our root path buffer. We only need to copy the location of root in
- * once, then we can just overwrite whatever file was there on the previous
- * iteration. */
- root = alpm_option_get_root(config->handle);
+ struct stat buf;
+ char root[PATH_MAX];
+ size_t rootlen;
+ char path[PATH_MAX]; /* absolute real path of the target */
+ char ppath[PATH_MAX]; /* absolute real path of the package file */
+ int ret = 0;
+
+ /* make sure root is a real path */
+ if(realpath(alpm_option_get_root(config->handle), root) == NULL) {
+ pm_printf(ALPM_LOG_ERROR,
+ _("cannot determine real path for '%s': %s
"),
+ alpm_option_get_root(config->handle), strerror(errno));
+ return 1;
+ };
rootlen = strlen(root);
- if(rootlen + 1 > PATH_MAX) {
- /* we are in trouble here */
+ /* make sure root ends with '/' */
+ if(root[rootlen - 1] != '/' && rootlen < PATH_MAX) {
+ root[rootlen] = '/';
+ rootlen++;
+ root[rootlen] = '';
+ }
+ /* make sure we actually have room to append the package files */
+ if(rootlen >= PATH_MAX) {
pm_printf(ALPM_LOG_ERROR, _("path too long: %s%s
"), root, "");
return 1;
}
- strcpy(path, root);

- db_local = alpm_get_localdb(config->handle);
+ alpm_db_t *db_local = alpm_get_localdb(config->handle);
+ alpm_list_t *packages = alpm_db_get_pkgcache(db_local);

+ /* loop through targets */
+ alpm_list_t *t;
for(t = targets; t; t = alpm_list_next(t)) {
- char *filename, *dname, *rpath;
- const char *bname;
- struct stat buf;
- alpm_list_t *i;
+ char *filename = strdup(t->data);
+ char *bname;
int found = 0;
+ int is_dir = 0;

- filename = strdup(t->data);
-
+ /* read target */
if(lstat(filename, &buf) == -1) {
- /* if it is not a path but a program name, then check in PATH */
if(strchr(filename, '/') == NULL) {
+ /* if it is not a path but a program name, then check in PATH */
if(search_path(&filename, &buf) == -1) {
- pm_printf(ALPM_LOG_ERROR, _("failed to find '%s' in PATH: %s
"),
+ pm_printf(ALPM_LOG_ERROR,
+ _("failed to find '%s' in PATH: %s
"),
filename, strerror(errno));
ret++;
free(filename);
continue;
}
} else {
+ /* filename is a path we can't read so bail */
+ pm_printf(ALPM_LOG_ERROR, _("failed to read file '%s': %s
"),
+ filename, strerror(errno));
+ ret++;
+ free(filename);
+ continue;
+ }
+ }
+ if((is_dir = S_ISDIR(buf.st_mode))){
+ /* make sure filename doesn't end with '/' because
+ * lstat('/var/mail') is not the same as lstat("/var/mail/") */
+ size_t flast = strlen(filename) - 1;
+ while(filename[flast] == '/' && flast > 0) {
+ filename[flast] = '';
+ flast--;
+ }
+ if(lstat(filename, &buf) == -1) {
pm_printf(ALPM_LOG_ERROR, _("failed to read file '%s': %s
"),
filename, strerror(errno));
ret++;
free(filename);
continue;
}
+ /* check if filename is actually a symlink */
+ is_dir = S_ISDIR(buf.st_mode);
}

- if(S_ISDIR(buf.st_mode)) {
+ /* convert filename to its real path */
+ if(lrealpath(filename, path) == NULL) {
pm_printf(ALPM_LOG_ERROR,
- _("cannot determine ownership of directory '%s'
"), filename);
+ _("cannot determine real path for '%s': %s
"),
+ filename, strerror(errno));
ret++;
free(filename);
continue;
}

- bname = mbasename(filename);
- dname = mdirname(filename);
- /* for files in '/', there is no directory name to match */
- if(strcmp(dname, "") == 0) {
- rpath = NULL;
- } else {
- rpath = resolve_path(dname);
-
- if(!rpath) {
- pm_printf(ALPM_LOG_ERROR, _("cannot determine real path for '%s': %s
"),
- filename, strerror(errno));
- free(filename);
- free(dname);
- free(rpath);
- ret++;
- continue;
- }
+ /* make sure target is inside root */
+ if( strlen(path) <= rootlen || strncmp(path, root, rootlen) != 0) {
+ pm_printf(ALPM_LOG_ERROR,
+ _("cannot determine ownership of file outside of root
"));
+ ret++;
+ free(filename);
+ continue;
}
- free(dname);

- for(i = alpm_db_get_pkgcache(db_local); i && !found; i = alpm_list_next(i)) {
+ bname = mbasename(path);
+
+ /* loop through installed packages */
+ alpm_list_t *i;
+ for(i = packages; i && (!found || is_dir); i = alpm_list_next(i)) {
alpm_pkg_t *info = i->data;
alpm_filelist_t *filelist = alpm_pkg_get_files(info);
- size_t j;

- for(j = 0; j < filelist->count; j++) {
+ /* loop through package files */
+ size_t j;
+ for(j = 0; j < filelist->count && (!found || is_dir); j++) {
const alpm_file_t *file = filelist->files + j;
- char *ppath, *pdname;
- const char *pkgfile = file->name;
+ size_t flen = strlen(file->name);

- /* avoid the costly resolve_path usage if the basenames don't match */
- if(strcmp(mbasename(pkgfile), bname) != 0) {
+ if(rootlen + flen >= PATH_MAX) {
+ pm_printf(ALPM_LOG_ERROR, _("path too long: %s%s
"),
+ root, file->name);
+ ret++;
continue;
- }
+ };

- /* for files in '/', there is no directory name to match */
- if(!rpath) {
- print_query_fileowner(filename, info);
- found = 1;
+ /* continue if target and file are not the same type */
+ if(!is_dir == (file->name[flen - 1] == '/')) {
continue;
}

- if(rootlen + 1 + strlen(pkgfile) > PATH_MAX) {
- pm_printf(ALPM_LOG_ERROR, _("path too long: %s%s
"), root, pkgfile);
+ /* check basenames before we bother resolving the path */
+ char *pbname = mbasename(file->name);
+ if(pbname && strcmp(pbname, bname) != 0) {
+ free(pbname);
+ continue;
}
- /* concatenate our file and the root path */
- strcpy(path + rootlen, pkgfile);
+ free(pbname);

- pdname = mdirname(path);
- ppath = resolve_path(pdname);
- free(pdname);
+ /* resolve and compare the path */
+ strcpy(root + rootlen, file->name);
+ if(lrealpath(root, ppath) == NULL) {
+ pm_printf(ALPM_LOG_ERROR,
+ _("cannot determine real path for '%s': %s
"),
+ file->name, strerror(errno));
+ ret++;
+ continue;
+ };

- if(ppath && strcmp(ppath, rpath) == 0) {
- print_query_fileowner(filename, info);
+ if(strcmp(ppath, path) == 0) {
+ print_query_fileowner(path, info);
found = 1;
}
- free(ppath);
}
}
+
if(!found) {
- pm_printf(ALPM_LOG_ERROR, _("No package owns %s
"), filename);
+ pm_printf(ALPM_LOG_ERROR, _("No package owns %s
"), path);
ret++;
}
+
+ free(bname);
free(filename);
- free(rpath);
}

return ret;
diff --git a/src/pacman/util.c b/src/pacman/util.c
index 7f7f6a7..733da9f 100644
--- a/src/pacman/util.c
+++ b/src/pacman/util.c
@@ -211,45 +211,121 @@ int rmrf(const char *path)
}

/** Parse the basename of a program from a path.
+* Equivalent to posix basename except a path is never modified and the
+* returned string needs to be freed.
+*
* @param path path to parse basename from
*
-* @return everything following the final '/'
+* @return final component of a path
*/
-const char *mbasename(const char *path)
+char *mbasename(const char *path)
{
- const char *last = strrchr(path, '/');
- if(last) {
- return last + 1;
- }
- return path;
+ char *tmp = strdup(path);
+ char *bname = strdup(basename(tmp));
+ free(tmp);
+ return bname;
}

/** Parse the dirname of a program from a path.
-* The path returned should be freed.
+* Equivalent to posix dirname except a path is never modified and the
+* returned string needs to be freed.
+*
* @param path path to parse dirname from
*
-* @return everything preceding the final '/'
+* @return everything preceding the final component of a path
*/
char *mdirname(const char *path)
{
- char *ret, *last;
+ char *tmp = strdup(path);
+ char *dname = strdup(dirname(tmp));
+ free(tmp);
+ return dname;
+}

- /* null or empty path */
- if(path == NULL || path == '') {
- return strdup(".");
+/** Resolve a path to its canonicalized absolute pathname.
+* Equivalent to posix realpath except that if a path points to a symlink the
+* parent directory is resolved rather than the symlink itself.
+*
+* @param path path to resolve
+* @param resolved_path string to hold resolved path, will be malloc'd if NULL
+*
+* @return pointer to resolved_path on success, NULL on error
+*/
+char *lrealpath(const char *path, char *resolved_path)
+{
+ char *bname = NULL;
+ char *dname = NULL;
+ int need_free = 0; /* did we calloc resolved_path? */
+
+ bname = mbasename(path);
+ if(bname == NULL) {
+ goto error;
+ } else if(strcmp(bname, "..") == 0
+ || strcmp(bname, ".") == 0
+ || strcmp(bname, "/") == 0) {
+ /* handle a few special cases of bname */
+ dname = strdup(path);
+ bname = NULL;
+ free(bname);
+ } else {
+ dname = mdirname(path);
+ }
+ if(dname == NULL) {
+ goto error;
}

- ret = strdup(path);
- last = strrchr(ret, '/');
+ if(resolved_path == NULL) {
+ resolved_path = calloc(PATH_MAX, sizeof(char));
+ if(resolved_path == NULL) {
+ goto error;
+ }
+ need_free = 1;
+ }

- if(last != NULL) {
- /* we found a '/', so terminate our string */
- *last = '';
- return ret;
+ if(realpath(dname, resolved_path) == NULL) {
+ goto error;
}
- /* no slash found */
- free(ret);
- return strdup(".");
+
+ /* add bname back if we have a valid one */
+ if(bname) {
+ size_t rlen = strlen(resolved_path);
+
+ /* error out if resolved_path is too long to append bname */
+ if((rlen + strlen(bname) + 2) >= PATH_MAX) {
+ goto error;
+ }
+
+ /* append trailing '/' if needed */
+ if(resolved_path[rlen - 1] != '/') {
+ resolved_path[rlen] = '/';
+ resolved_path[rlen+1] = '';
+ rlen++;
+ }
+
+ strcpy(resolved_path + rlen, bname);
+ }
+
+ if(bname) {
+ free(bname);
+ }
+ if(dname) {
+ free(dname);
+ }
+
+ return resolved_path;
+
+error:
+ if(need_free) {
+ free(resolved_path);
+ }
+ if(bname) {
+ free(bname);
+ }
+ if(dname) {
+ free(dname);
+ }
+
+ return NULL;
}

/* output a string, but wrap words properly with a specified indentation
@@ -1661,9 +1737,9 @@ int pm_vfprintf(FILE *stream, alpm_loglevel_t level, const char *format, va_list
/* A quick and dirty implementation derived from glibc */
static size_t strnlen(const char *s, size_t max)
{
- register const char *p;
- for(p = s; *p && max--; ++p);
- return (p - s);
+ register const char *p;
+ for(p = s; *p && max--; ++p);
+ return (p - s);
}

char *strndup(const char *s, size_t n)
@@ -1672,7 +1748,7 @@ char *strndup(const char *s, size_t n)
char *new = (char *) malloc(len + 1);

if(new == NULL)
- return NULL;
+ return NULL;

new[len] = '';
return (char *)memcpy(new, s, len);
diff --git a/src/pacman/util.h b/src/pacman/util.h
index 0dfdc85..b51505a 100644
--- a/src/pacman/util.h
+++ b/src/pacman/util.h
@@ -23,6 +23,7 @@
#include <stdlib.h>
#include <stdarg.h>
#include <string.h>
+#include <libgen.h>

#include <alpm_list.h>

@@ -49,8 +50,9 @@ int needs_root(void);
int check_syncdbs(size_t need_repos, int check_valid);
unsigned short getcols(int fd);
int rmrf(const char *path);
-const char *mbasename(const char *path);
+char *mbasename(const char *path);
char *mdirname(const char *path);
+char *lrealpath(const char *path, char *resolved_path);
void indentprint(const char *str, unsigned short indent, unsigned short cols);
size_t strtrim(char *str);
char *strreplace(const char *str, const char *needle, const char *replace);
--
1.7.10.2

Andrew Gregory 09-06-2012 03:34 AM

Allow querying directory ownership
 
The restriction of not checking the ownership of a directory is
unnecessary given that all the package filelists contain this
information. Remove this restriction, with the expectation that you
might get multiple packages returned for a given directory.
Additionally attempt to minimise the number of files getting through
to the slow realpath call.

Original-work-by: Dan McGee <dan@archlinux.org>
Original-work-by: Allan McRae <allan@archlinux.org>
Signed-off-by: Andrew Gregory <andrew.gregory.8@gmail.com>
---

Applies on top of my other -Qo patches queued in Allan's
working-split/query-owner branch.

src/pacman/query.c | 61 +++++++++++++++++++++++++++++++++++++++++-------------
1 file changed, 47 insertions(+), 14 deletions(-)

diff --git a/src/pacman/query.c b/src/pacman/query.c
index 92219e8..06505eb 100644
--- a/src/pacman/query.c
+++ b/src/pacman/query.c
@@ -97,6 +97,7 @@ static int query_fileowner(alpm_list_t *targets)
size_t rootlen;
alpm_list_t *t;
alpm_db_t *db_local;
+ alpm_list_t *packages;

/* This code is here for safety only */
if(targets == NULL) {
@@ -129,13 +130,14 @@ static int query_fileowner(alpm_list_t *targets)
}

db_local = alpm_get_localdb(config->handle);
+ packages = alpm_db_get_pkgcache(db_local);

for(t = targets; t; t = alpm_list_next(t)) {
char *filename = NULL, *dname = NULL, *rpath = NULL;
const char *bname;
struct stat buf;
alpm_list_t *i;
- size_t len;
+ size_t len, is_dir, bname_len, pbname_len;
int found = 0;

if((filename = strdup(t->data)) == NULL) {
@@ -163,15 +165,20 @@ static int query_fileowner(alpm_list_t *targets)
}
}

- if(S_ISDIR(buf.st_mode)) {
- pm_printf(ALPM_LOG_ERROR,
- _("cannot determine ownership of directory '%s'
"), filename);
- goto targcleanup;
+ if((is_dir = S_ISDIR(buf.st_mode))) {
+ /* the entire filename is safe to resolve if we know it points to a dir,
+ * and it's necessary in case it ends in . or .. */
+ dname = realpath(filename, NULL);
+ bname = mbasename(dname);
+ rpath = mdirname(dname);
+ } else {
+ bname = mbasename(filename);
+ dname = mdirname(filename);
+ rpath = realpath(dname, NULL);
}

- bname = mbasename(filename);
- dname = mdirname(filename);
- rpath = realpath(dname, NULL);
+ bname_len = strlen(bname);
+ pbname_len = bname_len + (is_dir ? 1 : 0);

if(!dname || !rpath) {
pm_printf(ALPM_LOG_ERROR, _("cannot determine real path for '%s': %s
"),
@@ -179,7 +186,7 @@ static int query_fileowner(alpm_list_t *targets)
goto targcleanup;
}

- for(i = alpm_db_get_pkgcache(db_local); i && !found; i = alpm_list_next(i)) {
+ for(i = packages; i && (!found || is_dir); i = alpm_list_next(i)) {
alpm_pkg_t *info = i->data;
alpm_filelist_t *filelist = alpm_pkg_get_files(info);
size_t j;
@@ -188,22 +195,48 @@ static int query_fileowner(alpm_list_t *targets)
const alpm_file_t *file = filelist->files + j;
char *ppath, *pdname;
const char *pkgfile = file->name;
+ size_t pkgfile_len = strlen(pkgfile);
+
+ /* make sure pkgfile and target are of the same type */
+ if(!is_dir != !(pkgfile[pkgfile_len - 1] == '/')) {
+ continue;
+ }
+
+ /* make sure pkgfile is long enough */
+ if(pkgfile_len < pbname_len) {
+ continue;
+ }

- /* avoid the costly realpath usage if the basenames don't match */
- if(strcmp(mbasename(pkgfile), bname) != 0) {
+ /* make sure pbname_len takes us to the start of a path component */
+ if(pbname_len != pkgfile_len && pkgfile[pkgfile_len - pbname_len - 1] != '/') {
+ continue;
+ }
+
+ /* compare basename with bname */
+ if(strncmp(pkgfile + pkgfile_len - pbname_len, bname, bname_len) != 0) {
continue;
}

/* concatenate our file and the root path */
- if(rootlen + 1 + strlen(pkgfile) > PATH_MAX) {
+ if(rootlen + 1 + pkgfile_len > PATH_MAX) {
path[rootlen] = ''; /* reset path for error message */
pm_printf(ALPM_LOG_ERROR, _("path too long: %s%s
"), path, pkgfile);
continue;
}
strcpy(path + rootlen, pkgfile);

- pdname = mdirname(path);
- ppath = realpath(pdname, NULL);
+ /* check that the file exists on disk and is the correct type */
+ if(lstat(path, &buf) == -1 || !is_dir != !S_ISDIR(buf.st_mode)) {
+ continue;
+ }
+
+ if(is_dir) {
+ pdname = realpath(path, NULL);
+ ppath = mdirname(pdname);
+ } else {
+ pdname = mdirname(path);
+ ppath = realpath(pdname, NULL);
+ }
free(pdname);

if(!ppath) {
--
1.7.12


All times are GMT. The time now is 04:37 PM.

VBulletin, Copyright ©2000 - 2014, Jelsoft Enterprises Ltd.
Content Relevant URLs by vBSEO ©2007, Crawlability, Inc.