FAQ Search Today's Posts Mark Forums Read
» Video Reviews

» Linux Archive

Linux-archive is a website aiming to archive linux email lists and to make them easily accessible for linux users/developers.


» Sponsor

» Partners

» Sponsor

Go Back   Linux Archive > ArchLinux > ArchLinux Pacman Development

 
 
LinkBack Thread Tools
 
Old 09-07-2012, 07:29 AM
Allan McRae
 
Default Allow querying directory ownership

On 06/09/12 13:34, Andrew Gregory wrote:
> The restriction of not checking the ownership of a directory is
> unnecessary given that all the package filelists contain this
> information. Remove this restriction, with the expectation that you
> might get multiple packages returned for a given directory.
> Additionally attempt to minimise the number of files getting through
> to the slow realpath call.
>
> Original-work-by: Dan McGee <dan@archlinux.org>
> Original-work-by: Allan McRae <allan@archlinux.org>
> Signed-off-by: Andrew Gregory <andrew.gregory.8@gmail.com>
> ---
>
> Applies on top of my other -Qo patches queued in Allan's
> working-split/query-owner branch.
>
> src/pacman/query.c | 61 +++++++++++++++++++++++++++++++++++++++++-------------
> 1 file changed, 47 insertions(+), 14 deletions(-)
>
> diff --git a/src/pacman/query.c b/src/pacman/query.c
> index 92219e8..06505eb 100644
> --- a/src/pacman/query.c
> +++ b/src/pacman/query.c
> @@ -97,6 +97,7 @@ static int query_fileowner(alpm_list_t *targets)
> size_t rootlen;
> alpm_list_t *t;
> alpm_db_t *db_local;
> + alpm_list_t *packages;
>
> /* This code is here for safety only */
> if(targets == NULL) {
> @@ -129,13 +130,14 @@ static int query_fileowner(alpm_list_t *targets)
> }
>
> db_local = alpm_get_localdb(config->handle);
> + packages = alpm_db_get_pkgcache(db_local);
>
> for(t = targets; t; t = alpm_list_next(t)) {
> char *filename = NULL, *dname = NULL, *rpath = NULL;
> const char *bname;
> struct stat buf;
> alpm_list_t *i;
> - size_t len;
> + size_t len, is_dir, bname_len, pbname_len;
> int found = 0;
>
> if((filename = strdup(t->data)) == NULL) {
> @@ -163,15 +165,20 @@ static int query_fileowner(alpm_list_t *targets)
> }
> }
>
> - if(S_ISDIR(buf.st_mode)) {
> - pm_printf(ALPM_LOG_ERROR,
> - _("cannot determine ownership of directory '%s'
"), filename);
> - goto targcleanup;
> + if((is_dir = S_ISDIR(buf.st_mode))) {
> + /* the entire filename is safe to resolve if we know it points to a dir,
> + * and it's necessary in case it ends in . or .. */
> + dname = realpath(filename, NULL);
> + bname = mbasename(dname);
> + rpath = mdirname(dname);
> + } else {
> + bname = mbasename(filename);
> + dname = mdirname(filename);
> + rpath = realpath(dname, NULL);
> }
>
> - bname = mbasename(filename);
> - dname = mdirname(filename);
> - rpath = realpath(dname, NULL);
> + bname_len = strlen(bname);
> + pbname_len = bname_len + (is_dir ? 1 : 0);
>
> if(!dname || !rpath) {
> pm_printf(ALPM_LOG_ERROR, _("cannot determine real path for '%s': %s
"),
> @@ -179,7 +186,7 @@ static int query_fileowner(alpm_list_t *targets)
> goto targcleanup;
> }
>
> - for(i = alpm_db_get_pkgcache(db_local); i && !found; i = alpm_list_next(i)) {
> + for(i = packages; i && (!found || is_dir); i = alpm_list_next(i)) {
> alpm_pkg_t *info = i->data;
> alpm_filelist_t *filelist = alpm_pkg_get_files(info);
> size_t j;
> @@ -188,22 +195,48 @@ static int query_fileowner(alpm_list_t *targets)
> const alpm_file_t *file = filelist->files + j;
> char *ppath, *pdname;
> const char *pkgfile = file->name;
> + size_t pkgfile_len = strlen(pkgfile);
> +
> + /* make sure pkgfile and target are of the same type */
> + if(!is_dir != !(pkgfile[pkgfile_len - 1] == '/')) {

!!! - literally.... How about:

if (is_dir && (pkgfile[pkgfile_len - 1] != '/'))

> + continue;
> + }
> +
> + /* make sure pkgfile is long enough */
> + if(pkgfile_len < pbname_len) {
> + continue;
> + }

If I understand this correctly, we are comparing the length of the final
component of the passed in parameter to the entire filename length from
the package. I think that it would be quite uncommon to hit the
continue here, so that can be removed.

>
> - /* avoid the costly realpath usage if the basenames don't match */
> - if(strcmp(mbasename(pkgfile), bname) != 0) {
> + /* make sure pbname_len takes us to the start of a path component */
> + if(pbname_len != pkgfile_len && pkgfile[pkgfile_len - pbname_len - 1] != '/') {

With the above removal:
if (pbname_len > pkgfile_len)

> + continue;
> + }
> +
> + /* compare basename with bname */
> + if(strncmp(pkgfile + pkgfile_len - pbname_len, bname, bname_len) != 0) {

Why an strncmp here? strcmp will do the same comparison...

> continue;
> }
>
> /* concatenate our file and the root path */
> - if(rootlen + 1 + strlen(pkgfile) > PATH_MAX) {
> + if(rootlen + 1 + pkgfile_len > PATH_MAX) {
> path[rootlen] = ''; /* reset path for error message */
> pm_printf(ALPM_LOG_ERROR, _("path too long: %s%s
"), path, pkgfile);
> continue;
> }
> strcpy(path + rootlen, pkgfile);
>
> - pdname = mdirname(path);
> - ppath = realpath(pdname, NULL);
> + /* check that the file exists on disk and is the correct type */
> + if(lstat(path, &buf) == -1 || !is_dir != !S_ISDIR(buf.st_mode)) {

!!! again. How about:

if(is_dir && lstat(path, &buf) == 0 && !S_ISDIR(buf.st_mode))

which is what I think is being tested here.

But I wonder if we really need this... I believe this is to avoid
"strange" results when people replace directories in a package with
symlinks to directories in another package. The more I think about
this, the more I want to just remove our special case handling of this
in general. A directory in a package and a symlink on the filesystem
should just be a conflict. So unless I am missing another reason for
this, get rid of it.

> + continue;
> + }
> +
> + if(is_dir) {
> + pdname = realpath(path, NULL);
> + ppath = mdirname(pdname);
> + } else {
> + pdname = mdirname(path);
> + ppath = realpath(pdname, NULL);
> + }
> free(pdname);

Umm... why calculate pdname just to free it immediately?

> if(!ppath) {
>
 
Old 09-07-2012, 11:06 AM
Menachem Moystoviz
 
Default Allow querying directory ownership

I'm not a dev, nor the original author of the patch, but...

On Fri, Sep 7, 2012 at 10:29 AM, Allan McRae <allan@archlinux.org> wrote:
> On 06/09/12 13:34, Andrew Gregory wrote:
>> The restriction of not checking the ownership of a directory is
>> unnecessary given that all the package filelists contain this
>> information. Remove this restriction, with the expectation that you
>> might get multiple packages returned for a given directory.
>> Additionally attempt to minimise the number of files getting through
>> to the slow realpath call.
>>
>> Original-work-by: Dan McGee <dan@archlinux.org>
>> Original-work-by: Allan McRae <allan@archlinux.org>
>> Signed-off-by: Andrew Gregory <andrew.gregory.8@gmail.com>
>> ---
>>
>> Applies on top of my other -Qo patches queued in Allan's
>> working-split/query-owner branch.
>>
>> src/pacman/query.c | 61 +++++++++++++++++++++++++++++++++++++++++-------------
>> 1 file changed, 47 insertions(+), 14 deletions(-)
>>
>> diff --git a/src/pacman/query.c b/src/pacman/query.c
>> index 92219e8..06505eb 100644
>> --- a/src/pacman/query.c
>> +++ b/src/pacman/query.c
>> @@ -97,6 +97,7 @@ static int query_fileowner(alpm_list_t *targets)
>> size_t rootlen;
>> alpm_list_t *t;
>> alpm_db_t *db_local;
>> + alpm_list_t *packages;
>>
>> /* This code is here for safety only */
>> if(targets == NULL) {
>> @@ -129,13 +130,14 @@ static int query_fileowner(alpm_list_t *targets)
>> }
>>
>> db_local = alpm_get_localdb(config->handle);
>> + packages = alpm_db_get_pkgcache(db_local);
>>
>> for(t = targets; t; t = alpm_list_next(t)) {
>> char *filename = NULL, *dname = NULL, *rpath = NULL;
>> const char *bname;
>> struct stat buf;
>> alpm_list_t *i;
>> - size_t len;
>> + size_t len, is_dir, bname_len, pbname_len;
>> int found = 0;
>>
>> if((filename = strdup(t->data)) == NULL) {
>> @@ -163,15 +165,20 @@ static int query_fileowner(alpm_list_t *targets)
>> }
>> }
>>
>> - if(S_ISDIR(buf.st_mode)) {
>> - pm_printf(ALPM_LOG_ERROR,
>> - _("cannot determine ownership of directory '%s'
"), filename);
>> - goto targcleanup;
>> + if((is_dir = S_ISDIR(buf.st_mode))) {
>> + /* the entire filename is safe to resolve if we know it points to a dir,
>> + * and it's necessary in case it ends in . or .. */
>> + dname = realpath(filename, NULL);
>> + bname = mbasename(dname);
>> + rpath = mdirname(dname);
>> + } else {
>> + bname = mbasename(filename);
>> + dname = mdirname(filename);
>> + rpath = realpath(dname, NULL);
>> }
>>
>> - bname = mbasename(filename);
>> - dname = mdirname(filename);
>> - rpath = realpath(dname, NULL);
>> + bname_len = strlen(bname);
>> + pbname_len = bname_len + (is_dir ? 1 : 0);
>>
>> if(!dname || !rpath) {
>> pm_printf(ALPM_LOG_ERROR, _("cannot determine real path for '%s': %s
"),
>> @@ -179,7 +186,7 @@ static int query_fileowner(alpm_list_t *targets)
>> goto targcleanup;
>> }
>>
>> - for(i = alpm_db_get_pkgcache(db_local); i && !found; i = alpm_list_next(i)) {
>> + for(i = packages; i && (!found || is_dir); i = alpm_list_next(i)) {
>> alpm_pkg_t *info = i->data;
>> alpm_filelist_t *filelist = alpm_pkg_get_files(info);
>> size_t j;
>> @@ -188,22 +195,48 @@ static int query_fileowner(alpm_list_t *targets)
>> const alpm_file_t *file = filelist->files + j;
>> char *ppath, *pdname;
>> const char *pkgfile = file->name;
>> + size_t pkgfile_len = strlen(pkgfile);
>> +
>> + /* make sure pkgfile and target are of the same type */
>> + if(!is_dir != !(pkgfile[pkgfile_len - 1] == '/')) {
>
> !!! - literally.... How about:
>
> if (is_dir && (pkgfile[pkgfile_len - 1] != '/'))
This isn't the same check - Andrew's check verifies that is_dir XOR
(pkgfile's last character is not '/' (i.e. pkgfile names a file
path)),
is true, while your test is for AND. This way of defining XOR is quite
confusing, though - isn't there a clearer way? (i.e. macros, inline
functions?)
>
>> + continue;
>> + }
>> +
>> + /* make sure pkgfile is long enough */
>> + if(pkgfile_len < pbname_len) {
>> + continue;
>> + }
>
> If I understand this correctly, we are comparing the length of the final
> component of the passed in parameter to the entire filename length from
> the package. I think that it would be quite uncommon to hit the
> continue here, so that can be removed.
>
I think you mean to say that we replace this if(){continue;} with
if(!){...} wrapping the rest of the loop?
That would make sense if this branch is costly.
>>
>> - /* avoid the costly realpath usage if the basenames don't match */
>> - if(strcmp(mbasename(pkgfile), bname) != 0) {
>> + /* make sure pbname_len takes us to the start of a path component */
>> + if(pbname_len != pkgfile_len && pkgfile[pkgfile_len - pbname_len - 1] != '/') {
>
> With the above removal:
> if (pbname_len > pkgfile_len)
>
>> + continue;
>> + }
>> +
>> + /* compare basename with bname */
>> + if(strncmp(pkgfile + pkgfile_len - pbname_len, bname, bname_len) != 0) {
>
> Why an strncmp here? strcmp will do the same comparison...
>
In order to make sure that we don't run into buffer overflow issues -
http://stackoverflow.com/questions/448563/am-i-correct-that-strcmp-is-equivalent-and-safe-for-literals
>> continue;
>> }
>>
>> /* concatenate our file and the root path */
>> - if(rootlen + 1 + strlen(pkgfile) > PATH_MAX) {
>> + if(rootlen + 1 + pkgfile_len > PATH_MAX) {
>> path[rootlen] = ''; /* reset path for error message */
>> pm_printf(ALPM_LOG_ERROR, _("path too long: %s%s
"), path, pkgfile);
>> continue;
>> }
>> strcpy(path + rootlen, pkgfile);
>>
>> - pdname = mdirname(path);
>> - ppath = realpath(pdname, NULL);
>> + /* check that the file exists on disk and is the correct type */
>> + if(lstat(path, &buf) == -1 || !is_dir != !S_ISDIR(buf.st_mode)) {
>
> !!! again. How about:
>
> if(is_dir && lstat(path, &buf) == 0 && !S_ISDIR(buf.st_mode))
>
> which is what I think is being tested here.
Again, your fix doesn't match the original test - Andrew's test
verifies that the file either doesn't exist
or that (the file is a directory) XOR is_dir is true.
>
> But I wonder if we really need this... I believe this is to avoid
> "strange" results when people replace directories in a package with
> symlinks to directories in another package. The more I think about
> this, the more I want to just remove our special case handling of this
> in general. A directory in a package and a symlink on the filesystem
> should just be a conflict. So unless I am missing another reason for
> this, get rid of it.
>
No comment, as I do not know the codebase well enough to respond.
>> + continue;
>> + }
>> +
>> + if(is_dir) {
>> + pdname = realpath(path, NULL);
>> + ppath = mdirname(pdname);
>> + } else {
>> + pdname = mdirname(path);
>> + ppath = realpath(pdname, NULL);
>> + }
>> free(pdname);
>
> Umm... why calculate pdname just to free it immediately?
Since we need to use it to calculate ppath?
>
>> if(!ppath) {
>>
>
>

Overall, I can't really judge the quality of Andrew's patch, but I definitely
do not agree with the comments given by Allan.

HTH in my own way,

Gesh
 
Old 09-07-2012, 11:56 AM
Allan McRae
 
Default Allow querying directory ownership

On 07/09/12 21:06, Menachem Moystoviz wrote:
> I'm not a dev, nor the original author of the patch, but...
>
> On Fri, Sep 7, 2012 at 10:29 AM, Allan McRae <allan@archlinux.org> wrote:
>> On 06/09/12 13:34, Andrew Gregory wrote:
>>> The restriction of not checking the ownership of a directory is
>>> unnecessary given that all the package filelists contain this
>>> information. Remove this restriction, with the expectation that you
>>> might get multiple packages returned for a given directory.
>>> Additionally attempt to minimise the number of files getting through
>>> to the slow realpath call.
>>>
>>> Original-work-by: Dan McGee <dan@archlinux.org>
>>> Original-work-by: Allan McRae <allan@archlinux.org>
>>> Signed-off-by: Andrew Gregory <andrew.gregory.8@gmail.com>
>>> ---
>>>
>>> Applies on top of my other -Qo patches queued in Allan's
>>> working-split/query-owner branch.
>>>
>>> src/pacman/query.c | 61 +++++++++++++++++++++++++++++++++++++++++-------------
>>> 1 file changed, 47 insertions(+), 14 deletions(-)
>>>
>>> diff --git a/src/pacman/query.c b/src/pacman/query.c
>>> index 92219e8..06505eb 100644
>>> --- a/src/pacman/query.c
>>> +++ b/src/pacman/query.c
>>> @@ -97,6 +97,7 @@ static int query_fileowner(alpm_list_t *targets)
>>> size_t rootlen;
>>> alpm_list_t *t;
>>> alpm_db_t *db_local;
>>> + alpm_list_t *packages;
>>>
>>> /* This code is here for safety only */
>>> if(targets == NULL) {
>>> @@ -129,13 +130,14 @@ static int query_fileowner(alpm_list_t *targets)
>>> }
>>>
>>> db_local = alpm_get_localdb(config->handle);
>>> + packages = alpm_db_get_pkgcache(db_local);
>>>
>>> for(t = targets; t; t = alpm_list_next(t)) {
>>> char *filename = NULL, *dname = NULL, *rpath = NULL;
>>> const char *bname;
>>> struct stat buf;
>>> alpm_list_t *i;
>>> - size_t len;
>>> + size_t len, is_dir, bname_len, pbname_len;
>>> int found = 0;
>>>
>>> if((filename = strdup(t->data)) == NULL) {
>>> @@ -163,15 +165,20 @@ static int query_fileowner(alpm_list_t *targets)
>>> }
>>> }
>>>
>>> - if(S_ISDIR(buf.st_mode)) {
>>> - pm_printf(ALPM_LOG_ERROR,
>>> - _("cannot determine ownership of directory '%s'
"), filename);
>>> - goto targcleanup;
>>> + if((is_dir = S_ISDIR(buf.st_mode))) {
>>> + /* the entire filename is safe to resolve if we know it points to a dir,
>>> + * and it's necessary in case it ends in . or .. */
>>> + dname = realpath(filename, NULL);
>>> + bname = mbasename(dname);
>>> + rpath = mdirname(dname);
>>> + } else {
>>> + bname = mbasename(filename);
>>> + dname = mdirname(filename);
>>> + rpath = realpath(dname, NULL);
>>> }
>>>
>>> - bname = mbasename(filename);
>>> - dname = mdirname(filename);
>>> - rpath = realpath(dname, NULL);
>>> + bname_len = strlen(bname);
>>> + pbname_len = bname_len + (is_dir ? 1 : 0);
>>>
>>> if(!dname || !rpath) {
>>> pm_printf(ALPM_LOG_ERROR, _("cannot determine real path for '%s': %s
"),
>>> @@ -179,7 +186,7 @@ static int query_fileowner(alpm_list_t *targets)
>>> goto targcleanup;
>>> }
>>>
>>> - for(i = alpm_db_get_pkgcache(db_local); i && !found; i = alpm_list_next(i)) {
>>> + for(i = packages; i && (!found || is_dir); i = alpm_list_next(i)) {
>>> alpm_pkg_t *info = i->data;
>>> alpm_filelist_t *filelist = alpm_pkg_get_files(info);
>>> size_t j;
>>> @@ -188,22 +195,48 @@ static int query_fileowner(alpm_list_t *targets)
>>> const alpm_file_t *file = filelist->files + j;
>>> char *ppath, *pdname;
>>> const char *pkgfile = file->name;
>>> + size_t pkgfile_len = strlen(pkgfile);
>>> +
>>> + /* make sure pkgfile and target are of the same type */
>>> + if(!is_dir != !(pkgfile[pkgfile_len - 1] == '/')) {
>>
>> !!! - literally.... How about:
>>
>> if (is_dir && (pkgfile[pkgfile_len - 1] != '/'))
> This isn't the same check - Andrew's check verifies that is_dir XOR
> (pkgfile's last character is not '/' (i.e. pkgfile names a file
> path)),
> is true, while your test is for AND. This way of defining XOR is quite
> confusing, though - isn't there a clearer way? (i.e. macros, inline
> functions?)

Ah - that is confusing... obviously!

So a clearer version of the test would be:

if ((!is_dir && pkgfile[pkgfile_len - 1] == '/') ||
(is_dir && pkgfile[pkgfile_len - 1] != '/'))

>>
>>> + continue;
>>> + }
>>> +
>>> + /* make sure pkgfile is long enough */
>>> + if(pkgfile_len < pbname_len) {
>>> + continue;
>>> + }
>>
>> If I understand this correctly, we are comparing the length of the final
>> component of the passed in parameter to the entire filename length from
>> the package. I think that it would be quite uncommon to hit the
>> continue here, so that can be removed.
>>
> I think you mean to say that we replace this if(){continue;} with
> if(!){...} wrapping the rest of the loop?
> That would make sense if this branch is costly.

No. I was saying that I think this test is going to result in a very
small number of cases being skipped and so it would be more efficient to
just not perform it.

>>>
>>> - /* avoid the costly realpath usage if the basenames don't match */
>>> - if(strcmp(mbasename(pkgfile), bname) != 0) {
>>> + /* make sure pbname_len takes us to the start of a path component */
>>> + if(pbname_len != pkgfile_len && pkgfile[pkgfile_len - pbname_len - 1] != '/') {
>>
>> With the above removal:
>> if (pbname_len > pkgfile_len)
>>
>>> + continue;
>>> + }
>>> +
>>> + /* compare basename with bname */
>>> + if(strncmp(pkgfile + pkgfile_len - pbname_len, bname, bname_len) != 0) {
>>
>> Why an strncmp here? strcmp will do the same comparison...
>>
> In order to make sure that we don't run into buffer overflow issues -
> http://stackoverflow.com/questions/448563/am-i-correct-that-strcmp-is-equivalent-and-safe-for-literals

Both strings are null terminated.

>>> continue;
>>> }
>>>
>>> /* concatenate our file and the root path */
>>> - if(rootlen + 1 + strlen(pkgfile) > PATH_MAX) {
>>> + if(rootlen + 1 + pkgfile_len > PATH_MAX) {
>>> path[rootlen] = ''; /* reset path for error message */
>>> pm_printf(ALPM_LOG_ERROR, _("path too long: %s%s
"), path, pkgfile);
>>> continue;
>>> }
>>> strcpy(path + rootlen, pkgfile);
>>>
>>> - pdname = mdirname(path);
>>> - ppath = realpath(pdname, NULL);
>>> + /* check that the file exists on disk and is the correct type */
>>> + if(lstat(path, &buf) == -1 || !is_dir != !S_ISDIR(buf.st_mode)) {
>>
>> !!! again. How about:
>>
>> if(is_dir && lstat(path, &buf) == 0 && !S_ISDIR(buf.st_mode))
>>
>> which is what I think is being tested here.
> Again, your fix doesn't match the original test - Andrew's test
> verifies that the file either doesn't exist
> or that (the file is a directory) XOR is_dir is true.
>>
>> But I wonder if we really need this... I believe this is to avoid
>> "strange" results when people replace directories in a package with
>> symlinks to directories in another package. The more I think about
>> this, the more I want to just remove our special case handling of this
>> in general. A directory in a package and a symlink on the filesystem
>> should just be a conflict. So unless I am missing another reason for
>> this, get rid of it.
>>
> No comment, as I do not know the codebase well enough to respond.
>>> + continue;
>>> + }
>>> +
>>> + if(is_dir) {
>>> + pdname = realpath(path, NULL);
>>> + ppath = mdirname(pdname);
>>> + } else {
>>> + pdname = mdirname(path);
>>> + ppath = realpath(pdname, NULL);
>>> + }
>>> free(pdname);
>>
>> Umm... why calculate pdname just to free it immediately?
> Since we need to use it to calculate ppath?

Oops... I blame doing quick review before rushing to the train!

>>
>>> if(!ppath) {
>>>
>>
>>
>
> Overall, I can't really judge the quality of Andrew's patch, but I definitely
> do not agree with the comments given by Allan.
>
> HTH in my own way,
>
> Gesh
>
>
>
 
Old 09-07-2012, 02:34 PM
Andrew Gregory
 
Default Allow querying directory ownership

On Fri, 07 Sep 2012 21:56:48 +1000
Allan McRae <allan@archlinux.org> wrote:

> On 07/09/12 21:06, Menachem Moystoviz wrote:
> > I'm not a dev, nor the original author of the patch, but...
> >
> > On Fri, Sep 7, 2012 at 10:29 AM, Allan McRae <allan@archlinux.org> wrote:
> >> On 06/09/12 13:34, Andrew Gregory wrote:

<snip>

> >>> +
> >>> + /* make sure pkgfile and target are of the same type */
> >>> + if(!is_dir != !(pkgfile[pkgfile_len - 1] == '/')) {
> >>
> >> !!! - literally.... How about:
> >>
> >> if (is_dir && (pkgfile[pkgfile_len - 1] != '/'))
> > This isn't the same check - Andrew's check verifies that is_dir XOR
> > (pkgfile's last character is not '/' (i.e. pkgfile names a file
> > path)),
> > is true, while your test is for AND. This way of defining XOR is quite
> > confusing, though - isn't there a clearer way? (i.e. macros, inline
> > functions?)
>
> Ah - that is confusing... obviously!
>
> So a clearer version of the test would be:
>
> if ((!is_dir && pkgfile[pkgfile_len - 1] == '/') ||
> (is_dir && pkgfile[pkgfile_len - 1] != '/'))
>

Yeah, the !!! xor can be a bit confusing if you're not familiar with
it, but I dislike long multi-line ifs. I can just force is_dir to 1
or 0 and compare them directly:

is_dir = S_ISDIR(buf.st_mode) ? 1 : 0;

...

if(is_dir != (pkgfile[pkgfile_len - 1] == '/'))...

> >>
> >>> + continue;
> >>> + }
> >>> +
> >>> + /* make sure pkgfile is long enough */
> >>> + if(pkgfile_len < pbname_len) {
> >>> + continue;
> >>> + }
> >>
> >> If I understand this correctly, we are comparing the length of the final
> >> component of the passed in parameter to the entire filename length from
> >> the package. I think that it would be quite uncommon to hit the
> >> continue here, so that can be removed.
> >>
> > I think you mean to say that we replace this if(){continue;} with
> > if(!){...} wrapping the rest of the loop?
> > That would make sense if this branch is costly.

This is a safety to make sure that we don't try to dereference a
negative index in the next if.

>
> No. I was saying that I think this test is going to result in a very
> small number of cases being skipped and so it would be more efficient to
> just not perform it.
>
> >>>
> >>> - /* avoid the costly realpath usage if the basenames don't match */
> >>> - if(strcmp(mbasename(pkgfile), bname) != 0) {
> >>> + /* make sure pbname_len takes us to the start of a path component */
> >>> + if(pbname_len != pkgfile_len && pkgfile[pkgfile_len - pbname_len - 1] != '/') {
> >>
> >> With the above removal:
> >> if (pbname_len > pkgfile_len)
> >>

This check must be != because it is specifically looking for pbname_len
either taking us to the beginning of the string or the beginning of a
path component, ie:

pkgfile == "basename"

or

pkgfile == "foo/bar/basename"

but not

pkgfile == "foo/barbasename"

It could be combined (not replaced) with the check in the previous if
statement, but that gets a little ugly, so I left them separate.

> >>> + continue;
> >>> + }
> >>> +
> >>> + /* compare basename with bname */
> >>> + if(strncmp(pkgfile + pkgfile_len - pbname_len, bname, bname_len) != 0) {
> >>
> >> Why an strncmp here? strcmp will do the same comparison...
> >>
> > In order to make sure that we don't run into buffer overflow issues -
> > http://stackoverflow.com/questions/448563/am-i-correct-that-strcmp-is-equivalent-and-safe-for-literals
>
> Both strings are null terminated.
>

strncmp is needed because if it's a directory pkgfile will end in '/'
but bname will not.

> >>> continue;
> >>> }
> >>>
> >>> /* concatenate our file and the root path */
> >>> - if(rootlen + 1 + strlen(pkgfile) > PATH_MAX) {
> >>> + if(rootlen + 1 + pkgfile_len > PATH_MAX) {
> >>> path[rootlen] = ''; /* reset path for error message */
> >>> pm_printf(ALPM_LOG_ERROR, _("path too long: %s%s
"), path, pkgfile);
> >>> continue;
> >>> }
> >>> strcpy(path + rootlen, pkgfile);
> >>>
> >>> - pdname = mdirname(path);
> >>> - ppath = realpath(pdname, NULL);
> >>> + /* check that the file exists on disk and is the correct type */
> >>> + if(lstat(path, &buf) == -1 || !is_dir != !S_ISDIR(buf.st_mode)) {
> >>
> >> !!! again. How about:
> >>
> >> if(is_dir && lstat(path, &buf) == 0 && !S_ISDIR(buf.st_mode))
> >>
> >> which is what I think is being tested here.
> > Again, your fix doesn't match the original test - Andrew's test
> > verifies that the file either doesn't exist
> > or that (the file is a directory) XOR is_dir is true.
> >>
> >> But I wonder if we really need this... I believe this is to avoid
> >> "strange" results when people replace directories in a package with
> >> symlinks to directories in another package. The more I think about
> >> this, the more I want to just remove our special case handling of this
> >> in general. A directory in a package and a symlink on the filesystem
> >> should just be a conflict. So unless I am missing another reason for
> >> this, get rid of it.
> >>
> > No comment, as I do not know the codebase well enough to respond.

I didn't really like this in the first place; deleted.

> >>> + continue;
> >>> + }
> >>> +
> >>> + if(is_dir) {
> >>> + pdname = realpath(path, NULL);
> >>> + ppath = mdirname(pdname);
> >>> + } else {
> >>> + pdname = mdirname(path);
> >>> + ppath = realpath(pdname, NULL);
> >>> + }
> >>> free(pdname);
> >>
> >> Umm... why calculate pdname just to free it immediately?
> > Since we need to use it to calculate ppath?
>
> Oops... I blame doing quick review before rushing to the train!
>

I copied this particular bit from above where I do the same thing to
the target, but it's unnecessary work here. A simple strncpy will
actually suffice:

strncpy(path + rootlen, pkgfile, pkgfile_len - pbname_len);
path[rootlen + pkgfile_len - pbname_len] = '';
ppath = realpath(path, NULL);

> >>
> >>> if(!ppath) {
> >>>
> >>
> >>
> >
> > Overall, I can't really judge the quality of Andrew's patch, but I definitely
> > do not agree with the comments given by Allan.
> >
> > HTH in my own way,
> >
> > Gesh
> >
> >
> >
>
>
 
Old 09-08-2012, 09:14 PM
Andrew Gregory
 
Default Allow querying directory ownership

The restriction of not checking the ownership of a directory is
unnecessary given that all the package filelists contain this
information. Remove this restriction, with the expectation that you
might get multiple packages returned for a given directory.
Additionally attempt to minimise the number of files getting through
to the slow realpath call.

Original-work-by: Dan McGee <dan@archlinux.org>
Original-work-by: Allan McRae <allan@archlinux.org>
Signed-off-by: Andrew Gregory <andrew.gregory.8@gmail.com>
---
src/pacman/query.c | 60 +++++++++++++++++++++++++++++++++++++-----------------
1 file changed, 41 insertions(+), 19 deletions(-)

diff --git a/src/pacman/query.c b/src/pacman/query.c
index 92219e8..4b6f429 100644
--- a/src/pacman/query.c
+++ b/src/pacman/query.c
@@ -97,6 +97,7 @@ static int query_fileowner(alpm_list_t *targets)
size_t rootlen;
alpm_list_t *t;
alpm_db_t *db_local;
+ alpm_list_t *packages;

/* This code is here for safety only */
if(targets == NULL) {
@@ -129,13 +130,14 @@ static int query_fileowner(alpm_list_t *targets)
}

db_local = alpm_get_localdb(config->handle);
+ packages = alpm_db_get_pkgcache(db_local);

for(t = targets; t; t = alpm_list_next(t)) {
char *filename = NULL, *dname = NULL, *rpath = NULL;
const char *bname;
struct stat buf;
alpm_list_t *i;
- size_t len;
+ size_t len, is_dir, bname_len, pbname_len;
int found = 0;

if((filename = strdup(t->data)) == NULL) {
@@ -163,15 +165,21 @@ static int query_fileowner(alpm_list_t *targets)
}
}

- if(S_ISDIR(buf.st_mode)) {
- pm_printf(ALPM_LOG_ERROR,
- _("cannot determine ownership of directory '%s'
"), filename);
- goto targcleanup;
+ is_dir = S_ISDIR(buf.st_mode) ? 1 : 0;
+ if(is_dir) {
+ /* the entire filename is safe to resolve if we know it points to a dir,
+ * and it's necessary in case it ends in . or .. */
+ dname = realpath(filename, NULL);
+ bname = mbasename(dname);
+ rpath = mdirname(dname);
+ } else {
+ bname = mbasename(filename);
+ dname = mdirname(filename);
+ rpath = realpath(dname, NULL);
}

- bname = mbasename(filename);
- dname = mdirname(filename);
- rpath = realpath(dname, NULL);
+ bname_len = strlen(bname);
+ pbname_len = bname_len + is_dir;

if(!dname || !rpath) {
pm_printf(ALPM_LOG_ERROR, _("cannot determine real path for '%s': %s
"),
@@ -179,33 +187,47 @@ static int query_fileowner(alpm_list_t *targets)
goto targcleanup;
}

- for(i = alpm_db_get_pkgcache(db_local); i && !found; i = alpm_list_next(i)) {
+ for(i = packages; i && (!found || is_dir); i = alpm_list_next(i)) {
alpm_pkg_t *info = i->data;
alpm_filelist_t *filelist = alpm_pkg_get_files(info);
size_t j;

for(j = 0; j < filelist->count; j++) {
const alpm_file_t *file = filelist->files + j;
- char *ppath, *pdname;
+ char *ppath;
const char *pkgfile = file->name;
+ size_t pkgfile_len = strlen(pkgfile);
+
+ /* make sure pkgfile and target are of the same type */
+ if(is_dir != (pkgfile[pkgfile_len - 1] == '/')) {
+ continue;
+ }
+
+ /* make sure pkgfile is long enough */
+ if(pkgfile_len < pbname_len) {
+ continue;
+ }
+
+ /* make sure pbname_len takes us to the start of a path component */
+ if(pbname_len != pkgfile_len && pkgfile[pkgfile_len - pbname_len - 1] != '/') {
+ continue;
+ }

- /* avoid the costly realpath usage if the basenames don't match */
- if(strcmp(mbasename(pkgfile), bname) != 0) {
+ /* compare basename with bname */
+ if(strncmp(pkgfile + pkgfile_len - pbname_len, bname, bname_len) != 0) {
continue;
}

- /* concatenate our file and the root path */
- if(rootlen + 1 + strlen(pkgfile) > PATH_MAX) {
+ /* concatenate our dirname and the root path */
+ if(rootlen + 1 + pkgfile_len - pbname_len > PATH_MAX) {
path[rootlen] = ''; /* reset path for error message */
pm_printf(ALPM_LOG_ERROR, _("path too long: %s%s
"), path, pkgfile);
continue;
}
- strcpy(path + rootlen, pkgfile);
-
- pdname = mdirname(path);
- ppath = realpath(pdname, NULL);
- free(pdname);
+ strncpy(path + rootlen, pkgfile, pkgfile_len - pbname_len);
+ path[rootlen + pkgfile_len - pbname_len] = '';

+ ppath = realpath(path, NULL);
if(!ppath) {
pm_printf(ALPM_LOG_ERROR, _("cannot determine real path for '%s': %s
"),
path, strerror(errno));
--
1.7.12
 

Thread Tools




All times are GMT. The time now is 09:41 PM.

VBulletin, Copyright ©2000 - 2014, Jelsoft Enterprises Ltd.
Content Relevant URLs by vBSEO ©2007, Crawlability, Inc.
Copyright 2007 - 2008, www.linux-archive.org